RDMA/mlx5: Use a union inside mlx5_ib_mr
[linux-2.6-block.git] / drivers / infiniband / hw / mlx5 / mr.c
CommitLineData
e126ba97 1/*
6cf0a15f 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
90da7dc8 3 * Copyright (c) 2020, Intel Corporation. All rights reserved.
e126ba97
EC
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34
35#include <linux/kref.h>
36#include <linux/random.h>
37#include <linux/debugfs.h>
38#include <linux/export.h>
746b5583 39#include <linux/delay.h>
90da7dc8
JX
40#include <linux/dma-buf.h>
41#include <linux/dma-resv.h>
e126ba97 42#include <rdma/ib_umem.h>
b4cfe447 43#include <rdma/ib_umem_odp.h>
968e78dd 44#include <rdma/ib_verbs.h>
e126ba97
EC
45#include "mlx5_ib.h"
46
f22c30aa
JG
47/*
48 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
49 * work on kernel modules memory
50 */
8010d74b 51void *xlt_emergency_page;
f22c30aa
JG
52static DEFINE_MUTEX(xlt_emergency_page_mutex);
53
e126ba97 54enum {
746b5583 55 MAX_PENDING_REG_MR = 8,
e126ba97
EC
56};
57
832a6b06 58#define MLX5_UMR_ALIGN 2048
fe45f827 59
fc6a9f86
SM
60static void
61create_mkey_callback(int status, struct mlx5_async_work *context);
ef3642c4
JG
62static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
63 u64 iova, int access_flags,
64 unsigned int page_size, bool populate);
fc6a9f86 65
5eb29f0d
JG
66static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
67 struct ib_pd *pd)
68{
69 struct mlx5_ib_dev *dev = to_mdev(pd->device);
70
71 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
72 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
73 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
74 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
75 MLX5_SET(mkc, mkc, lr, 1);
76
77 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
78 MLX5_SET(mkc, mkc, relaxed_ordering_write,
79 !!(acc & IB_ACCESS_RELAXED_ORDERING));
80 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
81 MLX5_SET(mkc, mkc, relaxed_ordering_read,
82 !!(acc & IB_ACCESS_RELAXED_ORDERING));
83
84 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
85 MLX5_SET(mkc, mkc, qpn, 0xffffff);
86 MLX5_SET64(mkc, mkc, start_addr, start_addr);
87}
88
fc6a9f86
SM
89static void
90assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
91 u32 *in)
92{
f743ff3b 93 u8 key = atomic_inc_return(&dev->mkey_var);
fc6a9f86 94 void *mkc;
fc6a9f86
SM
95
96 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
97 MLX5_SET(mkc, mkc, mkey_7_0, key);
98 mkey->key = key;
99}
100
101static int
102mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
103 u32 *in, int inlen)
104{
105 assign_mkey_variant(dev, mkey, in);
106 return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
107}
108
109static int
110mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
111 struct mlx5_core_mkey *mkey,
112 struct mlx5_async_ctx *async_ctx,
113 u32 *in, int inlen, u32 *out, int outlen,
114 struct mlx5_async_work *context)
115{
a3cfdd39 116 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
fc6a9f86 117 assign_mkey_variant(dev, mkey, in);
a3cfdd39
MG
118 return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
119 create_mkey_callback, context);
fc6a9f86
SM
120}
121
eeea6953
LR
122static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
123static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
8b7ff7f3 124static int mr_cache_max_order(struct mlx5_ib_dev *dev);
1c78a21a 125static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
c8d75a98
MD
126
127static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
128{
129 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
130}
131
b4cfe447
HE
132static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
133{
806b101b 134 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
b4cfe447 135
806b101b 136 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
b4cfe447
HE
137}
138
fc6a9f86 139static void create_mkey_callback(int status, struct mlx5_async_work *context)
746b5583 140{
e355477e
JG
141 struct mlx5_ib_mr *mr =
142 container_of(context, struct mlx5_ib_mr, cb_work);
b91e1751 143 struct mlx5_cache_ent *ent = mr->cache_ent;
ca991a7d 144 struct mlx5_ib_dev *dev = ent->dev;
746b5583
EC
145 unsigned long flags;
146
746b5583
EC
147 if (status) {
148 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
149 kfree(mr);
b9358bdb
JG
150 spin_lock_irqsave(&ent->lock, flags);
151 ent->pending--;
152 WRITE_ONCE(dev->fill_delay, 1);
153 spin_unlock_irqrestore(&ent->lock, flags);
746b5583
EC
154 mod_timer(&dev->delay_timer, jiffies + HZ);
155 return;
156 }
157
aa8e08d2 158 mr->mmkey.type = MLX5_MKEY_MR;
54c62e13
SM
159 mr->mmkey.key |= mlx5_idx_to_mkey(
160 MLX5_GET(create_mkey_out, mr->out, mkey_index));
db72438c 161 init_waitqueue_head(&mr->mmkey.wait);
746b5583 162
b9358bdb 163 WRITE_ONCE(dev->cache.last_add, jiffies);
746b5583
EC
164
165 spin_lock_irqsave(&ent->lock, flags);
166 list_add_tail(&mr->list, &ent->head);
7c8691a3
JG
167 ent->available_mrs++;
168 ent->total_mrs++;
1c78a21a
JG
169 /* If we are doing fill_to_high_water then keep going. */
170 queue_adjust_cache_locked(ent);
b9358bdb 171 ent->pending--;
746b5583 172 spin_unlock_irqrestore(&ent->lock, flags);
aad719dc
JG
173}
174
175static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
176{
177 struct mlx5_ib_mr *mr;
178
179 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
180 if (!mr)
181 return NULL;
aad719dc 182 mr->cache_ent = ent;
8605933a 183
5eb29f0d 184 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
aad719dc
JG
185 MLX5_SET(mkc, mkc, free, 1);
186 MLX5_SET(mkc, mkc, umr_en, 1);
187 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
188 MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
189
aad719dc
JG
190 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
191 MLX5_SET(mkc, mkc, log_page_size, ent->page);
192 return mr;
746b5583
EC
193}
194
aad719dc 195/* Asynchronously schedule new MRs to be populated in the cache. */
a1d8854a 196static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
e126ba97 197{
aad719dc 198 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
e126ba97 199 struct mlx5_ib_mr *mr;
ec22eb53
SM
200 void *mkc;
201 u32 *in;
e126ba97
EC
202 int err = 0;
203 int i;
204
ec22eb53 205 in = kzalloc(inlen, GFP_KERNEL);
e126ba97
EC
206 if (!in)
207 return -ENOMEM;
208
ec22eb53 209 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
e126ba97 210 for (i = 0; i < num; i++) {
aad719dc 211 mr = alloc_cache_mr(ent, mkc);
e126ba97
EC
212 if (!mr) {
213 err = -ENOMEM;
746b5583 214 break;
e126ba97 215 }
746b5583 216 spin_lock_irq(&ent->lock);
b9358bdb
JG
217 if (ent->pending >= MAX_PENDING_REG_MR) {
218 err = -EAGAIN;
219 spin_unlock_irq(&ent->lock);
220 kfree(mr);
221 break;
222 }
746b5583
EC
223 ent->pending++;
224 spin_unlock_irq(&ent->lock);
b91e1751
JG
225 err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
226 &ent->dev->async_ctx, in, inlen,
227 mr->out, sizeof(mr->out),
228 &mr->cb_work);
e126ba97 229 if (err) {
d14e7110
EC
230 spin_lock_irq(&ent->lock);
231 ent->pending--;
232 spin_unlock_irq(&ent->lock);
b91e1751 233 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
e126ba97 234 kfree(mr);
746b5583 235 break;
e126ba97 236 }
e126ba97
EC
237 }
238
e126ba97
EC
239 kfree(in);
240 return err;
241}
242
aad719dc
JG
243/* Synchronously create a MR in the cache */
244static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
245{
246 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
247 struct mlx5_ib_mr *mr;
248 void *mkc;
249 u32 *in;
250 int err;
251
252 in = kzalloc(inlen, GFP_KERNEL);
253 if (!in)
254 return ERR_PTR(-ENOMEM);
255 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
256
257 mr = alloc_cache_mr(ent, mkc);
258 if (!mr) {
259 err = -ENOMEM;
260 goto free_in;
261 }
262
263 err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
264 if (err)
265 goto free_mr;
266
267 mr->mmkey.type = MLX5_MKEY_MR;
268 WRITE_ONCE(ent->dev->cache.last_add, jiffies);
269 spin_lock_irq(&ent->lock);
270 ent->total_mrs++;
271 spin_unlock_irq(&ent->lock);
272 kfree(in);
273 return mr;
274free_mr:
275 kfree(mr);
276free_in:
277 kfree(in);
278 return ERR_PTR(err);
279}
280
b9358bdb 281static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
e126ba97 282{
e126ba97 283 struct mlx5_ib_mr *mr;
e126ba97 284
b9358bdb
JG
285 lockdep_assert_held(&ent->lock);
286 if (list_empty(&ent->head))
a1d8854a 287 return;
a1d8854a
JG
288 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
289 list_del(&mr->list);
290 ent->available_mrs--;
291 ent->total_mrs--;
292 spin_unlock_irq(&ent->lock);
293 mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
294 kfree(mr);
b9358bdb 295 spin_lock_irq(&ent->lock);
a1d8854a 296}
65edd0e7 297
a1d8854a
JG
298static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
299 bool limit_fill)
300{
301 int err;
302
303 lockdep_assert_held(&ent->lock);
304
305 while (true) {
306 if (limit_fill)
307 target = ent->limit * 2;
308 if (target == ent->available_mrs + ent->pending)
309 return 0;
310 if (target > ent->available_mrs + ent->pending) {
311 u32 todo = target - (ent->available_mrs + ent->pending);
312
313 spin_unlock_irq(&ent->lock);
314 err = add_keys(ent, todo);
315 if (err == -EAGAIN)
316 usleep_range(3000, 5000);
317 spin_lock_irq(&ent->lock);
318 if (err) {
319 if (err != -EAGAIN)
320 return err;
321 } else
322 return 0;
323 } else {
b9358bdb 324 remove_cache_mr_locked(ent);
a1d8854a 325 }
e126ba97
EC
326 }
327}
328
329static ssize_t size_write(struct file *filp, const char __user *buf,
330 size_t count, loff_t *pos)
331{
332 struct mlx5_cache_ent *ent = filp->private_data;
a1d8854a 333 u32 target;
e126ba97 334 int err;
e126ba97 335
a1d8854a
JG
336 err = kstrtou32_from_user(buf, count, 0, &target);
337 if (err)
338 return err;
746b5583 339
a1d8854a
JG
340 /*
341 * Target is the new value of total_mrs the user requests, however we
342 * cannot free MRs that are in use. Compute the target value for
343 * available_mrs.
344 */
345 spin_lock_irq(&ent->lock);
346 if (target < ent->total_mrs - ent->available_mrs) {
347 err = -EINVAL;
348 goto err_unlock;
e126ba97 349 }
a1d8854a
JG
350 target = target - (ent->total_mrs - ent->available_mrs);
351 if (target < ent->limit || target > ent->limit*2) {
352 err = -EINVAL;
353 goto err_unlock;
354 }
355 err = resize_available_mrs(ent, target, false);
356 if (err)
357 goto err_unlock;
358 spin_unlock_irq(&ent->lock);
e126ba97
EC
359
360 return count;
a1d8854a
JG
361
362err_unlock:
363 spin_unlock_irq(&ent->lock);
364 return err;
e126ba97
EC
365}
366
367static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
368 loff_t *pos)
369{
370 struct mlx5_cache_ent *ent = filp->private_data;
371 char lbuf[20];
372 int err;
373
7c8691a3 374 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
e126ba97
EC
375 if (err < 0)
376 return err;
377
60e6627f 378 return simple_read_from_buffer(buf, count, pos, lbuf, err);
e126ba97
EC
379}
380
381static const struct file_operations size_fops = {
382 .owner = THIS_MODULE,
383 .open = simple_open,
384 .write = size_write,
385 .read = size_read,
386};
387
388static ssize_t limit_write(struct file *filp, const char __user *buf,
389 size_t count, loff_t *pos)
390{
391 struct mlx5_cache_ent *ent = filp->private_data;
e126ba97
EC
392 u32 var;
393 int err;
e126ba97 394
a1d8854a
JG
395 err = kstrtou32_from_user(buf, count, 0, &var);
396 if (err)
397 return err;
e126ba97 398
a1d8854a
JG
399 /*
400 * Upon set we immediately fill the cache to high water mark implied by
401 * the limit.
402 */
403 spin_lock_irq(&ent->lock);
e126ba97 404 ent->limit = var;
a1d8854a
JG
405 err = resize_available_mrs(ent, 0, true);
406 spin_unlock_irq(&ent->lock);
407 if (err)
408 return err;
e126ba97
EC
409 return count;
410}
411
412static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
413 loff_t *pos)
414{
415 struct mlx5_cache_ent *ent = filp->private_data;
416 char lbuf[20];
417 int err;
418
e126ba97
EC
419 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
420 if (err < 0)
421 return err;
422
60e6627f 423 return simple_read_from_buffer(buf, count, pos, lbuf, err);
e126ba97
EC
424}
425
426static const struct file_operations limit_fops = {
427 .owner = THIS_MODULE,
428 .open = simple_open,
429 .write = limit_write,
430 .read = limit_read,
431};
432
b9358bdb 433static bool someone_adding(struct mlx5_mr_cache *cache)
e126ba97 434{
b9358bdb 435 unsigned int i;
e126ba97
EC
436
437 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
b9358bdb
JG
438 struct mlx5_cache_ent *ent = &cache->ent[i];
439 bool ret;
e126ba97 440
b9358bdb
JG
441 spin_lock_irq(&ent->lock);
442 ret = ent->available_mrs < ent->limit;
443 spin_unlock_irq(&ent->lock);
444 if (ret)
445 return true;
446 }
447 return false;
e126ba97
EC
448}
449
ad2d3ef4
JG
450/*
451 * Check if the bucket is outside the high/low water mark and schedule an async
452 * update. The cache refill has hysteresis, once the low water mark is hit it is
453 * refilled up to the high mark.
454 */
455static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
456{
457 lockdep_assert_held(&ent->lock);
458
1c78a21a 459 if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
b9358bdb 460 return;
1c78a21a
JG
461 if (ent->available_mrs < ent->limit) {
462 ent->fill_to_high_water = true;
463 queue_work(ent->dev->cache.wq, &ent->work);
464 } else if (ent->fill_to_high_water &&
465 ent->available_mrs + ent->pending < 2 * ent->limit) {
466 /*
467 * Once we start populating due to hitting a low water mark
468 * continue until we pass the high water mark.
469 */
ad2d3ef4 470 queue_work(ent->dev->cache.wq, &ent->work);
1c78a21a
JG
471 } else if (ent->available_mrs == 2 * ent->limit) {
472 ent->fill_to_high_water = false;
473 } else if (ent->available_mrs > 2 * ent->limit) {
474 /* Queue deletion of excess entries */
475 ent->fill_to_high_water = false;
476 if (ent->pending)
477 queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
478 msecs_to_jiffies(1000));
479 else
480 queue_work(ent->dev->cache.wq, &ent->work);
481 }
ad2d3ef4
JG
482}
483
e126ba97
EC
484static void __cache_work_func(struct mlx5_cache_ent *ent)
485{
486 struct mlx5_ib_dev *dev = ent->dev;
487 struct mlx5_mr_cache *cache = &dev->cache;
746b5583 488 int err;
e126ba97 489
b9358bdb
JG
490 spin_lock_irq(&ent->lock);
491 if (ent->disabled)
492 goto out;
e126ba97 493
1c78a21a
JG
494 if (ent->fill_to_high_water &&
495 ent->available_mrs + ent->pending < 2 * ent->limit &&
b9358bdb
JG
496 !READ_ONCE(dev->fill_delay)) {
497 spin_unlock_irq(&ent->lock);
b91e1751 498 err = add_keys(ent, 1);
b9358bdb
JG
499 spin_lock_irq(&ent->lock);
500 if (ent->disabled)
501 goto out;
502 if (err) {
aad719dc
JG
503 /*
504 * EAGAIN only happens if pending is positive, so we
505 * will be rescheduled from reg_mr_callback(). The only
506 * failure path here is ENOMEM.
507 */
508 if (err != -EAGAIN) {
b9358bdb
JG
509 mlx5_ib_warn(
510 dev,
511 "command failed order %d, err %d\n",
512 ent->order, err);
746b5583
EC
513 queue_delayed_work(cache->wq, &ent->dwork,
514 msecs_to_jiffies(1000));
746b5583
EC
515 }
516 }
7c8691a3 517 } else if (ent->available_mrs > 2 * ent->limit) {
b9358bdb
JG
518 bool need_delay;
519
ab5cdc31 520 /*
a1d8854a
JG
521 * The remove_cache_mr() logic is performed as garbage
522 * collection task. Such task is intended to be run when no
523 * other active processes are running.
ab5cdc31
LR
524 *
525 * The need_resched() will return TRUE if there are user tasks
526 * to be activated in near future.
527 *
a1d8854a
JG
528 * In such case, we don't execute remove_cache_mr() and postpone
529 * the garbage collection work to try to run in next cycle, in
530 * order to free CPU resources to other tasks.
ab5cdc31 531 */
b9358bdb
JG
532 spin_unlock_irq(&ent->lock);
533 need_delay = need_resched() || someone_adding(cache) ||
534 time_after(jiffies,
535 READ_ONCE(cache->last_add) + 300 * HZ);
536 spin_lock_irq(&ent->lock);
537 if (ent->disabled)
538 goto out;
539 if (need_delay)
746b5583 540 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
b9358bdb
JG
541 remove_cache_mr_locked(ent);
542 queue_adjust_cache_locked(ent);
e126ba97 543 }
b9358bdb
JG
544out:
545 spin_unlock_irq(&ent->lock);
e126ba97
EC
546}
547
548static void delayed_cache_work_func(struct work_struct *work)
549{
550 struct mlx5_cache_ent *ent;
551
552 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
553 __cache_work_func(ent);
554}
555
556static void cache_work_func(struct work_struct *work)
557{
558 struct mlx5_cache_ent *ent;
559
560 ent = container_of(work, struct mlx5_cache_ent, work);
561 __cache_work_func(ent);
562}
563
b91e1751
JG
564/* Allocate a special entry from the cache */
565struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
8383da3e 566 unsigned int entry, int access_flags)
49780d42
AK
567{
568 struct mlx5_mr_cache *cache = &dev->cache;
569 struct mlx5_cache_ent *ent;
570 struct mlx5_ib_mr *mr;
49780d42 571
b91e1751
JG
572 if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
573 entry >= ARRAY_SIZE(cache->ent)))
546d3009 574 return ERR_PTR(-EINVAL);
49780d42 575
8383da3e
JG
576 /* Matches access in alloc_cache_mr() */
577 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
578 return ERR_PTR(-EOPNOTSUPP);
579
49780d42 580 ent = &cache->ent[entry];
aad719dc
JG
581 spin_lock_irq(&ent->lock);
582 if (list_empty(&ent->head)) {
583 spin_unlock_irq(&ent->lock);
584 mr = create_cache_mr(ent);
585 if (IS_ERR(mr))
49780d42 586 return mr;
aad719dc
JG
587 } else {
588 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
589 list_del(&mr->list);
590 ent->available_mrs--;
591 queue_adjust_cache_locked(ent);
592 spin_unlock_irq(&ent->lock);
a639e667
JG
593
594 mlx5_clear_mr(mr);
49780d42 595 }
8383da3e 596 mr->access_flags = access_flags;
aad719dc 597 return mr;
49780d42
AK
598}
599
aad719dc
JG
600/* Return a MR already available in the cache */
601static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
e126ba97 602{
b91e1751 603 struct mlx5_ib_dev *dev = req_ent->dev;
e126ba97 604 struct mlx5_ib_mr *mr = NULL;
b91e1751 605 struct mlx5_cache_ent *ent = req_ent;
e126ba97 606
b91e1751
JG
607 /* Try larger MR pools from the cache to satisfy the allocation */
608 for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
609 mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
610 ent - dev->cache.ent);
e126ba97 611
746b5583 612 spin_lock_irq(&ent->lock);
e126ba97
EC
613 if (!list_empty(&ent->head)) {
614 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
615 list);
616 list_del(&mr->list);
7c8691a3 617 ent->available_mrs--;
ad2d3ef4 618 queue_adjust_cache_locked(ent);
746b5583 619 spin_unlock_irq(&ent->lock);
a639e667
JG
620 mlx5_clear_mr(mr);
621 return mr;
e126ba97 622 }
ad2d3ef4 623 queue_adjust_cache_locked(ent);
746b5583 624 spin_unlock_irq(&ent->lock);
e126ba97 625 }
a639e667
JG
626 req_ent->miss++;
627 return NULL;
e126ba97
EC
628}
629
1769c4c5
JG
630static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
631{
632 struct mlx5_cache_ent *ent = mr->cache_ent;
633
634 mr->cache_ent = NULL;
635 spin_lock_irq(&ent->lock);
636 ent->total_mrs--;
637 spin_unlock_irq(&ent->lock);
638}
639
49780d42 640void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
e126ba97 641{
b91e1751 642 struct mlx5_cache_ent *ent = mr->cache_ent;
e126ba97 643
b91e1751 644 if (!ent)
dd9a4034
VF
645 return;
646
09689703 647 if (mlx5_mr_cache_invalidate(mr)) {
1769c4c5 648 detach_mr_from_cache(mr);
afd14174 649 destroy_mkey(dev, mr);
e8993890 650 kfree(mr);
e126ba97
EC
651 return;
652 }
49780d42 653
746b5583 654 spin_lock_irq(&ent->lock);
e126ba97 655 list_add_tail(&mr->list, &ent->head);
7c8691a3 656 ent->available_mrs++;
ad2d3ef4 657 queue_adjust_cache_locked(ent);
746b5583 658 spin_unlock_irq(&ent->lock);
e126ba97
EC
659}
660
661static void clean_keys(struct mlx5_ib_dev *dev, int c)
662{
e126ba97
EC
663 struct mlx5_mr_cache *cache = &dev->cache;
664 struct mlx5_cache_ent *ent = &cache->ent[c];
65edd0e7 665 struct mlx5_ib_mr *tmp_mr;
e126ba97 666 struct mlx5_ib_mr *mr;
65edd0e7 667 LIST_HEAD(del_list);
e126ba97 668
3c461911 669 cancel_delayed_work(&ent->dwork);
e126ba97 670 while (1) {
746b5583 671 spin_lock_irq(&ent->lock);
e126ba97 672 if (list_empty(&ent->head)) {
746b5583 673 spin_unlock_irq(&ent->lock);
65edd0e7 674 break;
e126ba97
EC
675 }
676 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
65edd0e7 677 list_move(&mr->list, &del_list);
7c8691a3
JG
678 ent->available_mrs--;
679 ent->total_mrs--;
746b5583 680 spin_unlock_irq(&ent->lock);
65edd0e7
DJ
681 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
682 }
683
65edd0e7
DJ
684 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
685 list_del(&mr->list);
686 kfree(mr);
e126ba97
EC
687 }
688}
689
12cc1a02
LR
690static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
691{
6a4d00be 692 if (!mlx5_debugfs_root || dev->is_rep)
12cc1a02
LR
693 return;
694
695 debugfs_remove_recursive(dev->cache.root);
696 dev->cache.root = NULL;
697}
698
73eb8f03 699static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
e126ba97
EC
700{
701 struct mlx5_mr_cache *cache = &dev->cache;
702 struct mlx5_cache_ent *ent;
73eb8f03 703 struct dentry *dir;
e126ba97
EC
704 int i;
705
6a4d00be 706 if (!mlx5_debugfs_root || dev->is_rep)
73eb8f03 707 return;
e126ba97 708
9603b61d 709 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
e126ba97
EC
710
711 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
712 ent = &cache->ent[i];
713 sprintf(ent->name, "%d", ent->order);
73eb8f03
GKH
714 dir = debugfs_create_dir(ent->name, cache->root);
715 debugfs_create_file("size", 0600, dir, ent, &size_fops);
716 debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
7c8691a3 717 debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
73eb8f03 718 debugfs_create_u32("miss", 0600, dir, &ent->miss);
e126ba97 719 }
e126ba97
EC
720}
721
e99e88a9 722static void delay_time_func(struct timer_list *t)
746b5583 723{
e99e88a9 724 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
746b5583 725
b9358bdb 726 WRITE_ONCE(dev->fill_delay, 0);
746b5583
EC
727}
728
e126ba97
EC
729int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
730{
731 struct mlx5_mr_cache *cache = &dev->cache;
732 struct mlx5_cache_ent *ent;
e126ba97
EC
733 int i;
734
6bc1a656 735 mutex_init(&dev->slow_path_mutex);
3c856c82 736 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
e126ba97
EC
737 if (!cache->wq) {
738 mlx5_ib_warn(dev, "failed to create work queue\n");
739 return -ENOMEM;
740 }
741
e355477e 742 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
e99e88a9 743 timer_setup(&dev->delay_timer, delay_time_func, 0);
e126ba97 744 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
e126ba97
EC
745 ent = &cache->ent[i];
746 INIT_LIST_HEAD(&ent->head);
747 spin_lock_init(&ent->lock);
748 ent->order = i + 2;
749 ent->dev = dev;
49780d42 750 ent->limit = 0;
e126ba97 751
e126ba97
EC
752 INIT_WORK(&ent->work, cache_work_func);
753 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
49780d42 754
8b7ff7f3 755 if (i > MR_CACHE_LAST_STD_ENTRY) {
81713d37 756 mlx5_odp_init_mr_cache_entry(ent);
49780d42 757 continue;
81713d37 758 }
49780d42 759
8b7ff7f3 760 if (ent->order > mr_cache_max_order(dev))
49780d42
AK
761 continue;
762
763 ent->page = PAGE_SHIFT;
764 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
765 MLX5_IB_UMR_OCTOWORD;
766 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
767 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
8383da3e
JG
768 !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
769 mlx5_ib_can_load_pas_with_umr(dev, 0))
49780d42
AK
770 ent->limit = dev->mdev->profile->mr_cache[i].limit;
771 else
772 ent->limit = 0;
ad2d3ef4
JG
773 spin_lock_irq(&ent->lock);
774 queue_adjust_cache_locked(ent);
775 spin_unlock_irq(&ent->lock);
e126ba97
EC
776 }
777
73eb8f03 778 mlx5_mr_cache_debugfs_init(dev);
12cc1a02 779
e126ba97
EC
780 return 0;
781}
782
783int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
784{
b9358bdb 785 unsigned int i;
e126ba97 786
32927e28
MB
787 if (!dev->cache.wq)
788 return 0;
789
b9358bdb
JG
790 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
791 struct mlx5_cache_ent *ent = &dev->cache.ent[i];
792
793 spin_lock_irq(&ent->lock);
794 ent->disabled = true;
795 spin_unlock_irq(&ent->lock);
796 cancel_work_sync(&ent->work);
797 cancel_delayed_work_sync(&ent->dwork);
798 }
e126ba97
EC
799
800 mlx5_mr_cache_debugfs_cleanup(dev);
e355477e 801 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
e126ba97
EC
802
803 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
804 clean_keys(dev, i);
805
3c461911 806 destroy_workqueue(dev->cache.wq);
746b5583 807 del_timer_sync(&dev->delay_timer);
3c461911 808
e126ba97
EC
809 return 0;
810}
811
812struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
813{
814 struct mlx5_ib_dev *dev = to_mdev(pd->device);
ec22eb53 815 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
e126ba97 816 struct mlx5_ib_mr *mr;
ec22eb53
SM
817 void *mkc;
818 u32 *in;
e126ba97
EC
819 int err;
820
821 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
822 if (!mr)
823 return ERR_PTR(-ENOMEM);
824
ec22eb53 825 in = kzalloc(inlen, GFP_KERNEL);
e126ba97
EC
826 if (!in) {
827 err = -ENOMEM;
828 goto err_free;
829 }
830
ec22eb53
SM
831 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
832
cdbd0d2b 833 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
ec22eb53 834 MLX5_SET(mkc, mkc, length64, 1);
03232cc4 835 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
ec22eb53 836
fc6a9f86 837 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
e126ba97
EC
838 if (err)
839 goto err_in;
840
841 kfree(in);
aa8e08d2 842 mr->mmkey.type = MLX5_MKEY_MR;
a606b0f6
MB
843 mr->ibmr.lkey = mr->mmkey.key;
844 mr->ibmr.rkey = mr->mmkey.key;
e126ba97
EC
845 mr->umem = NULL;
846
847 return &mr->ibmr;
848
849err_in:
850 kfree(in);
851
852err_free:
853 kfree(mr);
854
855 return ERR_PTR(err);
856}
857
7b4cdaae 858static int get_octo_len(u64 addr, u64 len, int page_shift)
e126ba97 859{
7b4cdaae 860 u64 page_size = 1ULL << page_shift;
e126ba97
EC
861 u64 offset;
862 int npages;
863
864 offset = addr & (page_size - 1);
7b4cdaae 865 npages = ALIGN(len + offset, page_size) >> page_shift;
e126ba97
EC
866 return (npages + 1) / 2;
867}
868
8b7ff7f3 869static int mr_cache_max_order(struct mlx5_ib_dev *dev)
e126ba97 870{
7d0cc6ed 871 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
8b7ff7f3 872 return MR_CACHE_LAST_STD_ENTRY + 2;
4c25b7a3
MD
873 return MLX5_MAX_UMR_SHIFT;
874}
875
add08d76 876static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
e126ba97 877{
add08d76
CH
878 struct mlx5_ib_umr_context *context =
879 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
e126ba97 880
add08d76
CH
881 context->status = wc->status;
882 complete(&context->done);
883}
e126ba97 884
add08d76
CH
885static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
886{
887 context->cqe.done = mlx5_ib_umr_done;
888 context->status = -1;
889 init_completion(&context->done);
e126ba97
EC
890}
891
d5ea2df9
BJ
892static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
893 struct mlx5_umr_wr *umrwr)
894{
895 struct umr_common *umrc = &dev->umrc;
d34ac5cd 896 const struct ib_send_wr *bad;
d5ea2df9
BJ
897 int err;
898 struct mlx5_ib_umr_context umr_context;
899
900 mlx5_ib_init_umr_context(&umr_context);
901 umrwr->wr.wr_cqe = &umr_context.cqe;
902
903 down(&umrc->sem);
904 err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
905 if (err) {
906 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
907 } else {
908 wait_for_completion(&umr_context.done);
909 if (umr_context.status != IB_WC_SUCCESS) {
910 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
911 umr_context.status);
912 err = -EFAULT;
913 }
914 }
915 up(&umrc->sem);
916 return err;
917}
918
b91e1751
JG
919static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
920 unsigned int order)
921{
922 struct mlx5_mr_cache *cache = &dev->cache;
923
924 if (order < cache->ent[0].order)
925 return &cache->ent[0];
926 order = order - cache->ent[0].order;
927 if (order > MR_CACHE_LAST_STD_ENTRY)
928 return NULL;
929 return &cache->ent[order];
930}
931
38f8ff5b
JG
932static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
933 u64 length, int access_flags)
934{
935 mr->ibmr.lkey = mr->mmkey.key;
936 mr->ibmr.rkey = mr->mmkey.key;
937 mr->ibmr.length = length;
ca991a7d 938 mr->ibmr.device = &dev->ib_dev;
38f8ff5b
JG
939 mr->access_flags = access_flags;
940}
941
90da7dc8
JX
942static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
943 u64 iova)
944{
945 /*
946 * The alignment of iova has already been checked upon entering
947 * UVERBS_METHOD_REG_DMABUF_MR
948 */
949 umem->iova = iova;
950 return PAGE_SIZE;
951}
952
38f8ff5b
JG
953static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
954 struct ib_umem *umem, u64 iova,
955 int access_flags)
e126ba97
EC
956{
957 struct mlx5_ib_dev *dev = to_mdev(pd->device);
f0093fb1 958 struct mlx5_cache_ent *ent;
e126ba97 959 struct mlx5_ib_mr *mr;
d5c7916f 960 unsigned int page_size;
e126ba97 961
90da7dc8
JX
962 if (umem->is_dmabuf)
963 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
964 else
965 page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
966 0, iova);
d5c7916f
JG
967 if (WARN_ON(!page_size))
968 return ERR_PTR(-EINVAL);
969 ent = mr_cache_ent_from_order(
970 dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
38f8ff5b
JG
971 /*
972 * Matches access in alloc_cache_mr(). If the MR can't come from the
973 * cache then synchronously create an uncached one.
974 */
975 if (!ent || ent->limit == 0 ||
976 !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
977 mutex_lock(&dev->slow_path_mutex);
ef3642c4 978 mr = reg_create(pd, umem, iova, access_flags, page_size, false);
38f8ff5b
JG
979 mutex_unlock(&dev->slow_path_mutex);
980 return mr;
981 }
8383da3e 982
aad719dc
JG
983 mr = get_cache_mr(ent);
984 if (!mr) {
985 mr = create_cache_mr(ent);
38f8ff5b
JG
986 /*
987 * The above already tried to do the same stuff as reg_create(),
988 * no reason to try it again.
989 */
aad719dc
JG
990 if (IS_ERR(mr))
991 return mr;
e126ba97
EC
992 }
993
7d0cc6ed
AK
994 mr->ibmr.pd = pd;
995 mr->umem = umem;
f0093fb1
JG
996 mr->mmkey.iova = iova;
997 mr->mmkey.size = umem->length;
a606b0f6 998 mr->mmkey.pd = to_mpd(pd)->pdn;
d5c7916f 999 mr->page_shift = order_base_2(page_size);
38f8ff5b 1000 set_mr_fields(dev, mr, umem->length, access_flags);
b475598a 1001
e126ba97 1002 return mr;
e126ba97
EC
1003}
1004
7d0cc6ed
AK
1005#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
1006 MLX5_UMR_MTT_ALIGNMENT)
1007#define MLX5_SPARE_UMR_CHUNK 0x10000
1008
8010d74b
JG
1009/*
1010 * Allocate a temporary buffer to hold the per-page information to transfer to
1011 * HW. For efficiency this should be as large as it can be, but buffer
1012 * allocation failure is not allowed, so try smaller sizes.
1013 */
1014static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
f22c30aa 1015{
8010d74b
JG
1016 const size_t xlt_chunk_align =
1017 MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
1018 size_t size;
1019 void *res = NULL;
1020
1021 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
1022
1023 /*
1024 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
1025 * allocation can't trigger any kind of reclaim.
1026 */
1027 might_sleep();
1028
1029 gfp_mask |= __GFP_ZERO;
1030
1031 /*
1032 * If the system already has a suitable high order page then just use
1033 * that, but don't try hard to create one. This max is about 1M, so a
1034 * free x86 huge page will satisfy it.
1035 */
1036 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
1037 MLX5_MAX_UMR_CHUNK);
1038 *nents = size / ent_size;
1039 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1040 get_order(size));
1041 if (res)
1042 return res;
1043
1044 if (size > MLX5_SPARE_UMR_CHUNK) {
1045 size = MLX5_SPARE_UMR_CHUNK;
1046 *nents = get_order(size) / ent_size;
1047 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1048 get_order(size));
1049 if (res)
1050 return res;
1051 }
1052
1053 *nents = PAGE_SIZE / ent_size;
1054 res = (void *)__get_free_page(gfp_mask);
1055 if (res)
1056 return res;
1057
f22c30aa 1058 mutex_lock(&xlt_emergency_page_mutex);
8010d74b 1059 memset(xlt_emergency_page, 0, PAGE_SIZE);
f22c30aa
JG
1060 return xlt_emergency_page;
1061}
1062
8010d74b 1063static void mlx5_ib_free_xlt(void *xlt, size_t length)
f22c30aa 1064{
8010d74b
JG
1065 if (xlt == xlt_emergency_page) {
1066 mutex_unlock(&xlt_emergency_page_mutex);
1067 return;
1068 }
1069
1070 free_pages((unsigned long)xlt, get_order(length));
1071}
1072
1073/*
1074 * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
1075 * submission.
1076 */
1077static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
1078 struct mlx5_umr_wr *wr, struct ib_sge *sg,
1079 size_t nents, size_t ent_size,
1080 unsigned int flags)
1081{
ca991a7d 1082 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
7ec3df17 1083 struct device *ddev = &dev->mdev->pdev->dev;
8010d74b
JG
1084 dma_addr_t dma;
1085 void *xlt;
1086
1087 xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
1088 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
1089 GFP_KERNEL);
1090 sg->length = nents * ent_size;
1091 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
1092 if (dma_mapping_error(ddev, dma)) {
1093 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1094 mlx5_ib_free_xlt(xlt, sg->length);
1095 return NULL;
1096 }
1097 sg->addr = dma;
1098 sg->lkey = dev->umrc.pd->local_dma_lkey;
1099
1100 memset(wr, 0, sizeof(*wr));
1101 wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1102 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1103 wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1104 wr->wr.sg_list = sg;
1105 wr->wr.num_sge = 1;
1106 wr->wr.opcode = MLX5_IB_WR_UMR;
1107 wr->pd = mr->ibmr.pd;
1108 wr->mkey = mr->mmkey.key;
1109 wr->length = mr->mmkey.size;
1110 wr->virt_addr = mr->mmkey.iova;
1111 wr->access_flags = mr->access_flags;
1112 wr->page_shift = mr->page_shift;
1113 wr->xlt_size = sg->length;
1114 return xlt;
1115}
1116
1117static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
1118 struct ib_sge *sg)
1119{
7ec3df17 1120 struct device *ddev = &dev->mdev->pdev->dev;
8010d74b
JG
1121
1122 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
1123 mlx5_ib_free_xlt(xlt, sg->length);
f22c30aa
JG
1124}
1125
f1eaac37
JG
1126static unsigned int xlt_wr_final_send_flags(unsigned int flags)
1127{
1128 unsigned int res = 0;
1129
1130 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1131 res |= MLX5_IB_SEND_UMR_ENABLE_MR |
1132 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1133 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1134 if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
1135 res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1136 if (flags & MLX5_IB_UPD_XLT_ADDR)
1137 res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1138 return res;
1139}
1140
7d0cc6ed
AK
1141int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1142 int page_shift, int flags)
1143{
ca991a7d 1144 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
7ec3df17 1145 struct device *ddev = &dev->mdev->pdev->dev;
7d0cc6ed 1146 void *xlt;
e622f2f4 1147 struct mlx5_umr_wr wr;
832a6b06
HE
1148 struct ib_sge sg;
1149 int err = 0;
81713d37
AK
1150 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1151 ? sizeof(struct mlx5_klm)
1152 : sizeof(struct mlx5_mtt);
7d0cc6ed
AK
1153 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1154 const int page_mask = page_align - 1;
832a6b06
HE
1155 size_t pages_mapped = 0;
1156 size_t pages_to_map = 0;
8010d74b 1157 size_t pages_iter;
cbe4b8f0 1158 size_t size_to_map = 0;
8010d74b 1159 size_t orig_sg_length;
832a6b06 1160
c8d75a98
MD
1161 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1162 !umr_can_use_indirect_mkey(dev))
1163 return -EPERM;
832a6b06 1164
f1eaac37
JG
1165 if (WARN_ON(!mr->umem->is_odp))
1166 return -EINVAL;
1167
832a6b06 1168 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
7d0cc6ed
AK
1169 * so we need to align the offset and length accordingly
1170 */
1171 if (idx & page_mask) {
1172 npages += idx & page_mask;
1173 idx &= ~page_mask;
832a6b06 1174 }
7d0cc6ed 1175 pages_to_map = ALIGN(npages, page_align);
7d0cc6ed 1176
8010d74b
JG
1177 xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
1178 if (!xlt)
1179 return -ENOMEM;
1180 pages_iter = sg.length / desc_size;
1181 orig_sg_length = sg.length;
832a6b06 1182
f1eaac37
JG
1183 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1184 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1185 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
cbe4b8f0 1186
f1eaac37 1187 pages_to_map = min_t(size_t, pages_to_map, max_pages);
cbe4b8f0
AK
1188 }
1189
7d0cc6ed
AK
1190 wr.page_shift = page_shift;
1191
832a6b06
HE
1192 for (pages_mapped = 0;
1193 pages_mapped < pages_to_map && !err;
7d0cc6ed 1194 pages_mapped += pages_iter, idx += pages_iter) {
438b228e 1195 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
cbe4b8f0 1196 size_to_map = npages * desc_size;
8010d74b
JG
1197 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1198 DMA_TO_DEVICE);
f1eaac37 1199 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
8010d74b
JG
1200 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1201 DMA_TO_DEVICE);
832a6b06 1202
cbe4b8f0 1203 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
7d0cc6ed 1204
f1eaac37
JG
1205 if (pages_mapped + pages_iter >= pages_to_map)
1206 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
832a6b06 1207
7d0cc6ed 1208 wr.offset = idx * desc_size;
31616255 1209 wr.xlt_size = sg.length;
832a6b06 1210
d5ea2df9 1211 err = mlx5_ib_post_send_wait(dev, &wr);
832a6b06 1212 }
8010d74b
JG
1213 sg.length = orig_sg_length;
1214 mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
832a6b06
HE
1215 return err;
1216}
832a6b06 1217
f1eaac37
JG
1218/*
1219 * Send the DMA list to the HW for a normal MR using UMR.
90da7dc8
JX
1220 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
1221 * flag may be used.
f1eaac37 1222 */
90da7dc8 1223int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
f1eaac37 1224{
ca991a7d 1225 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
7ec3df17 1226 struct device *ddev = &dev->mdev->pdev->dev;
f1eaac37
JG
1227 struct ib_block_iter biter;
1228 struct mlx5_mtt *cur_mtt;
1229 struct mlx5_umr_wr wr;
1230 size_t orig_sg_length;
1231 struct mlx5_mtt *mtt;
1232 size_t final_size;
1233 struct ib_sge sg;
1234 int err = 0;
1235
1236 if (WARN_ON(mr->umem->is_odp))
1237 return -EINVAL;
1238
1239 mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
1240 ib_umem_num_dma_blocks(mr->umem,
1241 1 << mr->page_shift),
1242 sizeof(*mtt), flags);
1243 if (!mtt)
1244 return -ENOMEM;
1245 orig_sg_length = sg.length;
1246
1247 cur_mtt = mtt;
1248 rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
1249 BIT(mr->page_shift)) {
1250 if (cur_mtt == (void *)mtt + sg.length) {
1251 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1252 DMA_TO_DEVICE);
1253 err = mlx5_ib_post_send_wait(dev, &wr);
1254 if (err)
1255 goto err;
1256 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1257 DMA_TO_DEVICE);
1258 wr.offset += sg.length;
1259 cur_mtt = mtt;
1260 }
1261
1262 cur_mtt->ptag =
1263 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
1264 MLX5_IB_MTT_PRESENT);
90da7dc8
JX
1265
1266 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
1267 cur_mtt->ptag = 0;
1268
f1eaac37
JG
1269 cur_mtt++;
1270 }
1271
1272 final_size = (void *)cur_mtt - (void *)mtt;
1273 sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
1274 memset(cur_mtt, 0, sg.length - final_size);
1275 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
1276 wr.xlt_size = sg.length;
1277
1278 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
1279 err = mlx5_ib_post_send_wait(dev, &wr);
1280
1281err:
1282 sg.length = orig_sg_length;
1283 mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
1284 return err;
1285}
1286
395a8e4c
NO
1287/*
1288 * If ibmr is NULL it will be allocated by reg_create.
1289 * Else, the given ibmr will be used.
1290 */
ef3642c4
JG
1291static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
1292 u64 iova, int access_flags,
1293 unsigned int page_size, bool populate)
e126ba97
EC
1294{
1295 struct mlx5_ib_dev *dev = to_mdev(pd->device);
e126ba97 1296 struct mlx5_ib_mr *mr;
ec22eb53
SM
1297 __be64 *pas;
1298 void *mkc;
e126ba97 1299 int inlen;
ec22eb53 1300 u32 *in;
e126ba97 1301 int err;
938fe83c 1302 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
e126ba97 1303
ef3642c4
JG
1304 if (!page_size)
1305 return ERR_PTR(-EINVAL);
1306 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
e126ba97
EC
1307 if (!mr)
1308 return ERR_PTR(-ENOMEM);
1309
ff740aef
IL
1310 mr->ibmr.pd = pd;
1311 mr->access_flags = access_flags;
d5c7916f 1312 mr->page_shift = order_base_2(page_size);
ff740aef
IL
1313
1314 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1315 if (populate)
d5c7916f
JG
1316 inlen += sizeof(*pas) *
1317 roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
1b9a07ee 1318 in = kvzalloc(inlen, GFP_KERNEL);
e126ba97
EC
1319 if (!in) {
1320 err = -ENOMEM;
1321 goto err_1;
1322 }
ec22eb53 1323 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
8383da3e
JG
1324 if (populate) {
1325 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
1326 err = -EINVAL;
1327 goto err_2;
1328 }
d5c7916f 1329 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
c438fde1 1330 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
8383da3e 1331 }
e126ba97 1332
ec22eb53 1333 /* The pg_access bit allows setting the access flags
cc149f75 1334 * in the page list submitted with the command. */
ec22eb53
SM
1335 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1336
1337 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
f0093fb1 1338 set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
5eb29f0d 1339 populate ? pd : dev->umrc.pd);
ff740aef 1340 MLX5_SET(mkc, mkc, free, !populate);
cdbd0d2b 1341 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
8b7ff7f3 1342 MLX5_SET(mkc, mkc, umr_en, 1);
ec22eb53 1343
f0093fb1 1344 MLX5_SET64(mkc, mkc, len, umem->length);
ec22eb53
SM
1345 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1346 MLX5_SET(mkc, mkc, translations_octword_size,
d5c7916f
JG
1347 get_octo_len(iova, umem->length, mr->page_shift));
1348 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
ff740aef
IL
1349 if (populate) {
1350 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
d5c7916f 1351 get_octo_len(iova, umem->length, mr->page_shift));
ff740aef 1352 }
ec22eb53 1353
fc6a9f86 1354 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
e126ba97
EC
1355 if (err) {
1356 mlx5_ib_warn(dev, "create mkey failed\n");
1357 goto err_2;
1358 }
aa8e08d2 1359 mr->mmkey.type = MLX5_MKEY_MR;
49780d42 1360 mr->desc_size = sizeof(struct mlx5_mtt);
38f8ff5b
JG
1361 mr->umem = umem;
1362 set_mr_fields(dev, mr, umem->length, access_flags);
479163f4 1363 kvfree(in);
e126ba97 1364
a606b0f6 1365 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
e126ba97
EC
1366
1367 return mr;
1368
1369err_2:
479163f4 1370 kvfree(in);
e126ba97 1371err_1:
ef3642c4 1372 kfree(mr);
e126ba97
EC
1373 return ERR_PTR(err);
1374}
1375
3b113a1e
AL
1376static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1377 u64 length, int acc, int mode)
6c29f57e
AL
1378{
1379 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1380 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
6c29f57e
AL
1381 struct mlx5_ib_mr *mr;
1382 void *mkc;
1383 u32 *in;
1384 int err;
1385
1386 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1387 if (!mr)
1388 return ERR_PTR(-ENOMEM);
1389
1390 in = kzalloc(inlen, GFP_KERNEL);
1391 if (!in) {
1392 err = -ENOMEM;
1393 goto err_free;
1394 }
1395
1396 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1397
3b113a1e
AL
1398 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1399 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
6c29f57e 1400 MLX5_SET64(mkc, mkc, len, length);
03232cc4 1401 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
6c29f57e 1402
fc6a9f86 1403 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
6c29f57e
AL
1404 if (err)
1405 goto err_in;
1406
1407 kfree(in);
1408
fc332570 1409 set_mr_fields(dev, mr, length, acc);
6c29f57e
AL
1410
1411 return &mr->ibmr;
1412
1413err_in:
1414 kfree(in);
1415
1416err_free:
1417 kfree(mr);
1418
1419 return ERR_PTR(err);
1420}
1421
813e90b1
MS
1422int mlx5_ib_advise_mr(struct ib_pd *pd,
1423 enum ib_uverbs_advise_mr_advice advice,
1424 u32 flags,
1425 struct ib_sge *sg_list,
1426 u32 num_sge,
1427 struct uverbs_attr_bundle *attrs)
1428{
1429 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
677cf51f
YH
1430 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1431 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
813e90b1
MS
1432 return -EOPNOTSUPP;
1433
1434 return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1435 sg_list, num_sge);
1436}
1437
6c29f57e
AL
1438struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1439 struct ib_dm_mr_attr *attr,
1440 struct uverbs_attr_bundle *attrs)
1441{
1442 struct mlx5_ib_dm *mdm = to_mdm(dm);
3b113a1e
AL
1443 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1444 u64 start_addr = mdm->dev_addr + attr->offset;
1445 int mode;
1446
1447 switch (mdm->type) {
1448 case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1449 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1450 return ERR_PTR(-EINVAL);
1451
1452 mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1453 start_addr -= pci_resource_start(dev->pdev, 0);
1454 break;
25c13324
AL
1455 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1456 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1457 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1458 return ERR_PTR(-EINVAL);
1459
1460 mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1461 break;
3b113a1e 1462 default:
6c29f57e 1463 return ERR_PTR(-EINVAL);
3b113a1e 1464 }
6c29f57e 1465
3b113a1e
AL
1466 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1467 attr->access_flags, mode);
6c29f57e
AL
1468}
1469
38f8ff5b
JG
1470static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
1471 u64 iova, int access_flags)
e126ba97
EC
1472{
1473 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1474 struct mlx5_ib_mr *mr = NULL;
8383da3e 1475 bool xlt_with_umr;
e126ba97
EC
1476 int err;
1477
38f8ff5b 1478 xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
8383da3e 1479 if (xlt_with_umr) {
38f8ff5b
JG
1480 mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
1481 } else {
ef3642c4
JG
1482 unsigned int page_size = mlx5_umem_find_best_pgsz(
1483 umem, mkc, log_page_size, 0, iova);
1484
6bc1a656 1485 mutex_lock(&dev->slow_path_mutex);
ef3642c4 1486 mr = reg_create(pd, umem, iova, access_flags, page_size, true);
6bc1a656
ML
1487 mutex_unlock(&dev->slow_path_mutex);
1488 }
e126ba97 1489 if (IS_ERR(mr)) {
38f8ff5b
JG
1490 ib_umem_release(umem);
1491 return ERR_CAST(mr);
e126ba97
EC
1492 }
1493
a606b0f6 1494 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
e126ba97 1495
38f8ff5b 1496 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
e126ba97 1497
38f8ff5b 1498 if (xlt_with_umr) {
8383da3e
JG
1499 /*
1500 * If the MR was created with reg_create then it will be
1501 * configured properly but left disabled. It is safe to go ahead
1502 * and configure it again via UMR while enabling it.
1503 */
f1eaac37 1504 err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
ff740aef 1505 if (err) {
fbcd4983 1506 dereg_mr(dev, mr);
ff740aef
IL
1507 return ERR_PTR(err);
1508 }
1509 }
38f8ff5b
JG
1510 return &mr->ibmr;
1511}
ff740aef 1512
38f8ff5b
JG
1513static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
1514 u64 iova, int access_flags,
1515 struct ib_udata *udata)
1516{
1517 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1518 struct ib_umem_odp *odp;
1519 struct mlx5_ib_mr *mr;
1520 int err;
a03bfc37 1521
38f8ff5b
JG
1522 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1523 return ERR_PTR(-EOPNOTSUPP);
1524
1525 if (!start && length == U64_MAX) {
1526 if (iova != 0)
1527 return ERR_PTR(-EINVAL);
1528 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1529 return ERR_PTR(-EINVAL);
1530
1531 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
1532 if (IS_ERR(mr))
1533 return ERR_CAST(mr);
1534 return &mr->ibmr;
1535 }
1536
1537 /* ODP requires xlt update via umr to work. */
1538 if (!mlx5_ib_can_load_pas_with_umr(dev, length))
1539 return ERR_PTR(-EINVAL);
1540
1541 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
1542 &mlx5_mn_ops);
1543 if (IS_ERR(odp))
1544 return ERR_CAST(odp);
1545
1546 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
1547 if (IS_ERR(mr)) {
1548 ib_umem_release(&odp->umem);
1549 return ERR_CAST(mr);
a6bc3875 1550 }
13859d5d 1551
38f8ff5b 1552 odp->private = mr;
db72438c 1553 err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
38f8ff5b
JG
1554 if (err)
1555 goto err_dereg_mr;
1556
1557 err = mlx5_ib_init_odp_mr(mr);
1558 if (err)
1559 goto err_dereg_mr;
ff740aef 1560 return &mr->ibmr;
38f8ff5b
JG
1561
1562err_dereg_mr:
1563 dereg_mr(dev, mr);
e126ba97
EC
1564 return ERR_PTR(err);
1565}
1566
38f8ff5b
JG
1567struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1568 u64 iova, int access_flags,
1569 struct ib_udata *udata)
1570{
1571 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1572 struct ib_umem *umem;
1573
1574 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1575 return ERR_PTR(-EOPNOTSUPP);
1576
1577 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1578 start, iova, length, access_flags);
1579
1580 if (access_flags & IB_ACCESS_ON_DEMAND)
1581 return create_user_odp_mr(pd, start, length, iova, access_flags,
1582 udata);
1583 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
1584 if (IS_ERR(umem))
1585 return ERR_CAST(umem);
1586 return create_real_mr(pd, umem, iova, access_flags);
1587}
1588
90da7dc8
JX
1589static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
1590{
1591 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
1592 struct mlx5_ib_mr *mr = umem_dmabuf->private;
1593
1594 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
1595
1596 if (!umem_dmabuf->sgt)
1597 return;
1598
1599 mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
1600 ib_umem_dmabuf_unmap_pages(umem_dmabuf);
1601}
1602
1603static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
1604 .allow_peer2peer = 1,
1605 .move_notify = mlx5_ib_dmabuf_invalidate_cb,
1606};
1607
1608struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
1609 u64 length, u64 virt_addr,
1610 int fd, int access_flags,
1611 struct ib_udata *udata)
1612{
1613 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1614 struct mlx5_ib_mr *mr = NULL;
1615 struct ib_umem_dmabuf *umem_dmabuf;
1616 int err;
1617
1618 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
1619 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1620 return ERR_PTR(-EOPNOTSUPP);
1621
1622 mlx5_ib_dbg(dev,
1623 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
1624 offset, virt_addr, length, fd, access_flags);
1625
1626 /* dmabuf requires xlt update via umr to work. */
1627 if (!mlx5_ib_can_load_pas_with_umr(dev, length))
1628 return ERR_PTR(-EINVAL);
1629
1630 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
1631 access_flags,
1632 &mlx5_ib_dmabuf_attach_ops);
1633 if (IS_ERR(umem_dmabuf)) {
1634 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
1635 PTR_ERR(umem_dmabuf));
1636 return ERR_CAST(umem_dmabuf);
1637 }
1638
1639 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
1640 access_flags);
1641 if (IS_ERR(mr)) {
1642 ib_umem_release(&umem_dmabuf->umem);
1643 return ERR_CAST(mr);
1644 }
1645
1646 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1647
1648 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
1649 umem_dmabuf->private = mr;
db72438c 1650 err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
90da7dc8
JX
1651 if (err)
1652 goto err_dereg_mr;
1653
1654 err = mlx5_ib_init_dmabuf_mr(mr);
1655 if (err)
1656 goto err_dereg_mr;
1657 return &mr->ibmr;
1658
1659err_dereg_mr:
1660 dereg_mr(dev, mr);
1661 return ERR_PTR(err);
1662}
1663
09689703
JG
1664/**
1665 * mlx5_mr_cache_invalidate - Fence all DMA on the MR
1666 * @mr: The MR to fence
1667 *
1668 * Upon return the NIC will not be doing any DMA to the pages under the MR,
1669 * and any DMA inprogress will be completed. Failure of this function
1670 * indicates the HW has failed catastrophically.
1671 */
1672int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
e126ba97 1673{
0025b0bd 1674 struct mlx5_umr_wr umrwr = {};
e126ba97 1675
ca991a7d 1676 if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
89ea94a7
MG
1677 return 0;
1678
9ec4483a
YH
1679 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1680 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
7d0cc6ed 1681 umrwr.wr.opcode = MLX5_IB_WR_UMR;
ca991a7d 1682 umrwr.pd = mr_to_mdev(mr)->umrc.pd;
7d0cc6ed 1683 umrwr.mkey = mr->mmkey.key;
6a053953 1684 umrwr.ignore_free_state = 1;
e126ba97 1685
ca991a7d 1686 return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
e126ba97
EC
1687}
1688
ef3642c4
JG
1689/*
1690 * True if the change in access flags can be done via UMR, only some access
1691 * flags can be updated.
1692 */
1693static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
1694 unsigned int current_access_flags,
1695 unsigned int target_access_flags)
56e11d62 1696{
ef3642c4
JG
1697 unsigned int diffs = current_access_flags ^ target_access_flags;
1698
1699 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
1700 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
1701 return false;
1702 return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
1703 target_access_flags);
1704}
1705
1706static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1707 int access_flags)
1708{
1709 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1710 struct mlx5_umr_wr umrwr = {
1711 .wr = {
1712 .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
1713 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
1714 .opcode = MLX5_IB_WR_UMR,
1715 },
1716 .mkey = mr->mmkey.key,
1717 .pd = pd,
1718 .access_flags = access_flags,
1719 };
56e11d62
NO
1720 int err;
1721
ef3642c4
JG
1722 err = mlx5_ib_post_send_wait(dev, &umrwr);
1723 if (err)
1724 return err;
56e11d62 1725
ef3642c4
JG
1726 mr->access_flags = access_flags;
1727 mr->mmkey.pd = to_mpd(pd)->pdn;
1728 return 0;
1729}
1730
1731static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
1732 struct ib_umem *new_umem,
1733 int new_access_flags, u64 iova,
1734 unsigned long *page_size)
1735{
1736 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1737
1738 /* We only track the allocated sizes of MRs from the cache */
1739 if (!mr->cache_ent)
1740 return false;
1741 if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
1742 return false;
1743
1744 *page_size =
1745 mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
1746 if (WARN_ON(!*page_size))
1747 return false;
1748 return (1ULL << mr->cache_ent->order) >=
1749 ib_umem_num_dma_blocks(new_umem, *page_size);
1750}
1751
1752static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1753 int access_flags, int flags, struct ib_umem *new_umem,
1754 u64 iova, unsigned long page_size)
1755{
1756 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1757 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
1758 struct ib_umem *old_umem = mr->umem;
1759 int err;
1760
1761 /*
1762 * To keep everything simple the MR is revoked before we start to mess
1763 * with it. This ensure the change is atomic relative to any use of the
1764 * MR.
1765 */
1766 err = mlx5_mr_cache_invalidate(mr);
1767 if (err)
1768 return err;
56e11d62 1769
ef3642c4
JG
1770 if (flags & IB_MR_REREG_PD) {
1771 mr->ibmr.pd = pd;
1772 mr->mmkey.pd = to_mpd(pd)->pdn;
1773 upd_flags |= MLX5_IB_UPD_XLT_PD;
1774 }
1775 if (flags & IB_MR_REREG_ACCESS) {
1776 mr->access_flags = access_flags;
1777 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
56e11d62
NO
1778 }
1779
ef3642c4
JG
1780 mr->ibmr.length = new_umem->length;
1781 mr->mmkey.iova = iova;
1782 mr->mmkey.size = new_umem->length;
1783 mr->page_shift = order_base_2(page_size);
1784 mr->umem = new_umem;
1785 err = mlx5_ib_update_mr_pas(mr, upd_flags);
1786 if (err) {
1787 /*
1788 * The MR is revoked at this point so there is no issue to free
1789 * new_umem.
1790 */
1791 mr->umem = old_umem;
1792 return err;
1793 }
56e11d62 1794
ef3642c4
JG
1795 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
1796 ib_umem_release(old_umem);
1797 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
1798 return 0;
56e11d62
NO
1799}
1800
6e0954b1 1801struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
ef3642c4
JG
1802 u64 length, u64 iova, int new_access_flags,
1803 struct ib_pd *new_pd,
6e0954b1 1804 struct ib_udata *udata)
56e11d62
NO
1805{
1806 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1807 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
56e11d62
NO
1808 int err;
1809
ef3642c4
JG
1810 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1811 return ERR_PTR(-EOPNOTSUPP);
56e11d62 1812
ef3642c4
JG
1813 mlx5_ib_dbg(
1814 dev,
1815 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1816 start, iova, length, new_access_flags);
b4bd701a 1817
ef3642c4 1818 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
6e0954b1 1819 return ERR_PTR(-EOPNOTSUPP);
880505cf 1820
ef3642c4
JG
1821 if (!(flags & IB_MR_REREG_ACCESS))
1822 new_access_flags = mr->access_flags;
1823 if (!(flags & IB_MR_REREG_PD))
1824 new_pd = ib_mr->pd;
b4bd701a 1825
ef3642c4
JG
1826 if (!(flags & IB_MR_REREG_TRANS)) {
1827 struct ib_umem *umem;
1828
1829 /* Fast path for PD/access change */
1830 if (can_use_umr_rereg_access(dev, mr->access_flags,
1831 new_access_flags)) {
1832 err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
1833 if (err)
1834 return ERR_PTR(err);
1835 return NULL;
f0093fb1 1836 }
90da7dc8
JX
1837 /* DM or ODP MR's don't have a normal umem so we can't re-use it */
1838 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
ef3642c4 1839 goto recreate;
56e11d62 1840
56e11d62 1841 /*
ef3642c4
JG
1842 * Only one active MR can refer to a umem at one time, revoke
1843 * the old MR before assigning the umem to the new one.
56e11d62 1844 */
ef3642c4 1845 err = mlx5_mr_cache_invalidate(mr);
56e11d62 1846 if (err)
ef3642c4
JG
1847 return ERR_PTR(err);
1848 umem = mr->umem;
1849 mr->umem = NULL;
1850 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
56e11d62 1851
ef3642c4
JG
1852 return create_real_mr(new_pd, umem, mr->mmkey.iova,
1853 new_access_flags);
1854 }
7d0cc6ed 1855
ef3642c4 1856 /*
90da7dc8
JX
1857 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
1858 * but the logic around releasing the umem is different
ef3642c4 1859 */
90da7dc8 1860 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
ef3642c4
JG
1861 goto recreate;
1862
1863 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
1864 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
1865 struct ib_umem *new_umem;
1866 unsigned long page_size;
1867
1868 new_umem = ib_umem_get(&dev->ib_dev, start, length,
1869 new_access_flags);
1870 if (IS_ERR(new_umem))
1871 return ERR_CAST(new_umem);
1872
1873 /* Fast path for PAS change */
1874 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
1875 &page_size)) {
1876 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
1877 new_umem, iova, page_size);
1878 if (err) {
1879 ib_umem_release(new_umem);
1880 return ERR_PTR(err);
1881 }
1882 return NULL;
7d0cc6ed 1883 }
ef3642c4 1884 return create_real_mr(new_pd, new_umem, iova, new_access_flags);
56e11d62
NO
1885 }
1886
ef3642c4
JG
1887 /*
1888 * Everything else has no state we can preserve, just create a new MR
1889 * from scratch
1890 */
1891recreate:
1892 return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
1893 new_access_flags, udata);
56e11d62
NO
1894}
1895
8a187ee5
SG
1896static int
1897mlx5_alloc_priv_descs(struct ib_device *device,
1898 struct mlx5_ib_mr *mr,
1899 int ndescs,
1900 int desc_size)
1901{
7ec3df17
PP
1902 struct mlx5_ib_dev *dev = to_mdev(device);
1903 struct device *ddev = &dev->mdev->pdev->dev;
8a187ee5
SG
1904 int size = ndescs * desc_size;
1905 int add_size;
1906 int ret;
1907
1908 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1909
1910 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1911 if (!mr->descs_alloc)
1912 return -ENOMEM;
1913
1914 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1915
7ec3df17
PP
1916 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
1917 if (dma_mapping_error(ddev, mr->desc_map)) {
8a187ee5
SG
1918 ret = -ENOMEM;
1919 goto err;
1920 }
1921
1922 return 0;
1923err:
1924 kfree(mr->descs_alloc);
1925
1926 return ret;
1927}
1928
1929static void
1930mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1931{
f18ec422 1932 if (!mr->umem && mr->descs) {
8a187ee5
SG
1933 struct ib_device *device = mr->ibmr.device;
1934 int size = mr->max_descs * mr->desc_size;
7ec3df17 1935 struct mlx5_ib_dev *dev = to_mdev(device);
8a187ee5 1936
7ec3df17
PP
1937 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
1938 DMA_TO_DEVICE);
8a187ee5
SG
1939 kfree(mr->descs_alloc);
1940 mr->descs = NULL;
1941 }
1942}
1943
eeea6953 1944static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
e126ba97 1945{
f18ec422 1946 if (mr->ibmr.type == IB_MR_TYPE_INTEGRITY) {
8b91ffc1
SG
1947 if (mlx5_core_destroy_psv(dev->mdev,
1948 mr->sig->psv_memory.psv_idx))
1949 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1950 mr->sig->psv_memory.psv_idx);
1951 if (mlx5_core_destroy_psv(dev->mdev,
1952 mr->sig->psv_wire.psv_idx))
1953 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1954 mr->sig->psv_wire.psv_idx);
50211ec9 1955 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
8b91ffc1
SG
1956 kfree(mr->sig);
1957 mr->sig = NULL;
1958 }
1959
b91e1751 1960 if (!mr->cache_ent) {
eeea6953 1961 destroy_mkey(dev, mr);
b9332dad
YH
1962 mlx5_free_priv_descs(mr);
1963 }
6aec21f6
HE
1964}
1965
eeea6953 1966static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
6aec21f6 1967{
6aec21f6
HE
1968 struct ib_umem *umem = mr->umem;
1969
09689703
JG
1970 /* Stop all DMA */
1971 if (is_odp_mr(mr))
1972 mlx5_ib_fence_odp_mr(mr);
90da7dc8
JX
1973 else if (is_dmabuf_mr(mr))
1974 mlx5_ib_fence_dmabuf_mr(mr);
09689703
JG
1975 else
1976 clean_mr(dev, mr);
8b4d5bc5 1977
1c3d247e
JG
1978 if (umem) {
1979 if (!is_odp_mr(mr))
1980 atomic_sub(ib_umem_num_pages(umem),
1981 &dev->mdev->priv.reg_pages);
1982 ib_umem_release(umem);
1983 }
1984
b91e1751 1985 if (mr->cache_ent)
09689703
JG
1986 mlx5_mr_cache_free(dev, mr);
1987 else
1988 kfree(mr);
e126ba97
EC
1989}
1990
c4367a26 1991int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
fbcd4983 1992{
6c984472
MG
1993 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1994
de0ae958
IR
1995 if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1996 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1997 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1998 }
6c984472 1999
5256edcb
JG
2000 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
2001 mlx5_ib_free_implicit_mr(mmr);
2002 return 0;
2003 }
2004
6c984472
MG
2005 dereg_mr(to_mdev(ibmr->device), mmr);
2006
eeea6953 2007 return 0;
fbcd4983
IL
2008}
2009
7796d2a3
MG
2010static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
2011 int access_mode, int page_shift)
2012{
2013 void *mkc;
2014
2015 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2016
8383da3e
JG
2017 /* This is only used from the kernel, so setting the PD is OK. */
2018 set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
7796d2a3 2019 MLX5_SET(mkc, mkc, free, 1);
7796d2a3
MG
2020 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
2021 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
2022 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
2023 MLX5_SET(mkc, mkc, umr_en, 1);
2024 MLX5_SET(mkc, mkc, log_page_size, page_shift);
2025}
2026
2027static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2028 int ndescs, int desc_size, int page_shift,
2029 int access_mode, u32 *in, int inlen)
2030{
2031 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2032 int err;
2033
2034 mr->access_mode = access_mode;
2035 mr->desc_size = desc_size;
2036 mr->max_descs = ndescs;
2037
2038 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
2039 if (err)
2040 return err;
2041
2042 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
2043
fc6a9f86 2044 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
7796d2a3
MG
2045 if (err)
2046 goto err_free_descs;
2047
2048 mr->mmkey.type = MLX5_MKEY_MR;
2049 mr->ibmr.lkey = mr->mmkey.key;
2050 mr->ibmr.rkey = mr->mmkey.key;
2051
2052 return 0;
2053
2054err_free_descs:
2055 mlx5_free_priv_descs(mr);
2056 return err;
2057}
2058
6c984472 2059static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
de0ae958
IR
2060 u32 max_num_sg, u32 max_num_meta_sg,
2061 int desc_size, int access_mode)
3121e3c4 2062{
ec22eb53 2063 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
6c984472 2064 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
7796d2a3 2065 int page_shift = 0;
ec22eb53 2066 struct mlx5_ib_mr *mr;
ec22eb53 2067 u32 *in;
b005d316 2068 int err;
3121e3c4
SG
2069
2070 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2071 if (!mr)
2072 return ERR_PTR(-ENOMEM);
2073
7796d2a3
MG
2074 mr->ibmr.pd = pd;
2075 mr->ibmr.device = pd->device;
2076
ec22eb53 2077 in = kzalloc(inlen, GFP_KERNEL);
3121e3c4
SG
2078 if (!in) {
2079 err = -ENOMEM;
2080 goto err_free;
2081 }
2082
de0ae958 2083 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
7796d2a3 2084 page_shift = PAGE_SHIFT;
3121e3c4 2085
7796d2a3
MG
2086 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
2087 access_mode, in, inlen);
6c984472
MG
2088 if (err)
2089 goto err_free_in;
6c984472 2090
6c984472
MG
2091 mr->umem = NULL;
2092 kfree(in);
2093
2094 return mr;
2095
6c984472
MG
2096err_free_in:
2097 kfree(in);
2098err_free:
2099 kfree(mr);
2100 return ERR_PTR(err);
2101}
2102
7796d2a3
MG
2103static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2104 int ndescs, u32 *in, int inlen)
2105{
2106 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
2107 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
2108 inlen);
2109}
2110
2111static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2112 int ndescs, u32 *in, int inlen)
2113{
2114 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
2115 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
2116}
2117
2118static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2119 int max_num_sg, int max_num_meta_sg,
2120 u32 *in, int inlen)
2121{
2122 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2123 u32 psv_index[2];
2124 void *mkc;
2125 int err;
2126
2127 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
2128 if (!mr->sig)
2129 return -ENOMEM;
2130
2131 /* create mem & wire PSVs */
2132 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
2133 if (err)
2134 goto err_free_sig;
2135
2136 mr->sig->psv_memory.psv_idx = psv_index[0];
2137 mr->sig->psv_wire.psv_idx = psv_index[1];
2138
2139 mr->sig->sig_status_checked = true;
2140 mr->sig->sig_err_exists = false;
2141 /* Next UMR, Arm SIGERR */
2142 ++mr->sig->sigerr_count;
2143 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
2144 sizeof(struct mlx5_klm),
2145 MLX5_MKC_ACCESS_MODE_KLMS);
2146 if (IS_ERR(mr->klm_mr)) {
2147 err = PTR_ERR(mr->klm_mr);
2148 goto err_destroy_psv;
2149 }
2150 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
2151 sizeof(struct mlx5_mtt),
2152 MLX5_MKC_ACCESS_MODE_MTT);
2153 if (IS_ERR(mr->mtt_mr)) {
2154 err = PTR_ERR(mr->mtt_mr);
2155 goto err_free_klm_mr;
2156 }
2157
2158 /* Set bsf descriptors for mkey */
2159 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2160 MLX5_SET(mkc, mkc, bsf_en, 1);
2161 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
2162
2163 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
2164 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
2165 if (err)
2166 goto err_free_mtt_mr;
2167
50211ec9
JG
2168 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
2169 mr->sig, GFP_KERNEL));
2170 if (err)
2171 goto err_free_descs;
7796d2a3
MG
2172 return 0;
2173
50211ec9
JG
2174err_free_descs:
2175 destroy_mkey(dev, mr);
2176 mlx5_free_priv_descs(mr);
7796d2a3
MG
2177err_free_mtt_mr:
2178 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
2179 mr->mtt_mr = NULL;
2180err_free_klm_mr:
2181 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
2182 mr->klm_mr = NULL;
2183err_destroy_psv:
2184 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
2185 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
2186 mr->sig->psv_memory.psv_idx);
2187 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
2188 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
2189 mr->sig->psv_wire.psv_idx);
2190err_free_sig:
2191 kfree(mr->sig);
2192
2193 return err;
2194}
2195
6c984472
MG
2196static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
2197 enum ib_mr_type mr_type, u32 max_num_sg,
2198 u32 max_num_meta_sg)
2199{
2200 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2201 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
2202 int ndescs = ALIGN(max_num_sg, 4);
2203 struct mlx5_ib_mr *mr;
6c984472
MG
2204 u32 *in;
2205 int err;
2206
2207 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2208 if (!mr)
2209 return ERR_PTR(-ENOMEM);
2210
2211 in = kzalloc(inlen, GFP_KERNEL);
2212 if (!in) {
2213 err = -ENOMEM;
2214 goto err_free;
2215 }
2216
7796d2a3
MG
2217 mr->ibmr.device = pd->device;
2218 mr->umem = NULL;
3121e3c4 2219
7796d2a3
MG
2220 switch (mr_type) {
2221 case IB_MR_TYPE_MEM_REG:
2222 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
2223 break;
2224 case IB_MR_TYPE_SG_GAPS:
2225 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
2226 break;
2227 case IB_MR_TYPE_INTEGRITY:
2228 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
2229 max_num_meta_sg, in, inlen);
2230 break;
2231 default:
9bee178b
SG
2232 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
2233 err = -EINVAL;
3121e3c4
SG
2234 }
2235
3121e3c4 2236 if (err)
7796d2a3 2237 goto err_free_in;
3121e3c4 2238
3121e3c4
SG
2239 kfree(in);
2240
2241 return &mr->ibmr;
2242
3121e3c4
SG
2243err_free_in:
2244 kfree(in);
2245err_free:
2246 kfree(mr);
2247 return ERR_PTR(err);
2248}
2249
6c984472 2250struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
42a3b153 2251 u32 max_num_sg)
6c984472
MG
2252{
2253 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
2254}
2255
2256struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
2257 u32 max_num_sg, u32 max_num_meta_sg)
2258{
2259 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
2260 max_num_meta_sg);
2261}
2262
d18bb3e1 2263int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
d2370e0a 2264{
d18bb3e1 2265 struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
ec22eb53 2266 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
d18bb3e1 2267 struct mlx5_ib_mw *mw = to_mmw(ibmw);
ec22eb53
SM
2268 u32 *in = NULL;
2269 void *mkc;
d2370e0a
MB
2270 int ndescs;
2271 int err;
2272 struct mlx5_ib_alloc_mw req = {};
2273 struct {
2274 __u32 comp_mask;
2275 __u32 response_length;
2276 } resp = {};
2277
2278 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
2279 if (err)
d18bb3e1 2280 return err;
d2370e0a
MB
2281
2282 if (req.comp_mask || req.reserved1 || req.reserved2)
d18bb3e1 2283 return -EOPNOTSUPP;
d2370e0a
MB
2284
2285 if (udata->inlen > sizeof(req) &&
2286 !ib_is_udata_cleared(udata, sizeof(req),
2287 udata->inlen - sizeof(req)))
d18bb3e1 2288 return -EOPNOTSUPP;
d2370e0a
MB
2289
2290 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
2291
ec22eb53 2292 in = kzalloc(inlen, GFP_KERNEL);
d18bb3e1 2293 if (!in) {
d2370e0a
MB
2294 err = -ENOMEM;
2295 goto free;
2296 }
2297
ec22eb53
SM
2298 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2299
2300 MLX5_SET(mkc, mkc, free, 1);
2301 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
d18bb3e1 2302 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
ec22eb53
SM
2303 MLX5_SET(mkc, mkc, umr_en, 1);
2304 MLX5_SET(mkc, mkc, lr, 1);
cdbd0d2b 2305 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
d18bb3e1 2306 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
ec22eb53
SM
2307 MLX5_SET(mkc, mkc, qpn, 0xffffff);
2308
fc6a9f86 2309 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
d2370e0a
MB
2310 if (err)
2311 goto free;
2312
aa8e08d2 2313 mw->mmkey.type = MLX5_MKEY_MW;
d18bb3e1 2314 ibmw->rkey = mw->mmkey.key;
db570d7d 2315 mw->ndescs = ndescs;
d2370e0a 2316
70c1430f
LR
2317 resp.response_length =
2318 min(offsetofend(typeof(resp), response_length), udata->outlen);
d2370e0a
MB
2319 if (resp.response_length) {
2320 err = ib_copy_to_udata(udata, &resp, resp.response_length);
d18bb3e1
LR
2321 if (err)
2322 goto free_mkey;
d2370e0a
MB
2323 }
2324
806b101b 2325 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
db72438c 2326 err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
806b101b
JG
2327 if (err)
2328 goto free_mkey;
2329 }
2330
d2370e0a 2331 kfree(in);
d18bb3e1 2332 return 0;
d2370e0a 2333
806b101b
JG
2334free_mkey:
2335 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
d2370e0a 2336free:
d2370e0a 2337 kfree(in);
d18bb3e1 2338 return err;
d2370e0a
MB
2339}
2340
2341int mlx5_ib_dealloc_mw(struct ib_mw *mw)
2342{
04177915 2343 struct mlx5_ib_dev *dev = to_mdev(mw->device);
d2370e0a 2344 struct mlx5_ib_mw *mmw = to_mmw(mw);
d2370e0a 2345
db72438c
YH
2346 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
2347 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
04177915 2348 /*
db72438c
YH
2349 * pagefault_single_data_segment() may be accessing mmw
2350 * if the user bound an ODP MR to this MW.
04177915 2351 */
db72438c 2352 mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
04177915 2353
d18bb3e1 2354 return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
d2370e0a
MB
2355}
2356
d5436ba0
SG
2357int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
2358 struct ib_mr_status *mr_status)
2359{
2360 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2361 int ret = 0;
2362
2363 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2364 pr_err("Invalid status check mask\n");
2365 ret = -EINVAL;
2366 goto done;
2367 }
2368
2369 mr_status->fail_status = 0;
2370 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2371 if (!mmr->sig) {
2372 ret = -EINVAL;
2373 pr_err("signature status check requested on a non-signature enabled MR\n");
2374 goto done;
2375 }
2376
2377 mmr->sig->sig_status_checked = true;
2378 if (!mmr->sig->sig_err_exists)
2379 goto done;
2380
2381 if (ibmr->lkey == mmr->sig->err_item.key)
2382 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2383 sizeof(mr_status->sig_err));
2384 else {
2385 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2386 mr_status->sig_err.sig_err_offset = 0;
2387 mr_status->sig_err.key = mmr->sig->err_item.key;
2388 }
2389
2390 mmr->sig->sig_err_exists = false;
2391 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2392 }
2393
2394done:
2395 return ret;
2396}
8a187ee5 2397
2563e2f3
MG
2398static int
2399mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2400 int data_sg_nents, unsigned int *data_sg_offset,
2401 struct scatterlist *meta_sg, int meta_sg_nents,
2402 unsigned int *meta_sg_offset)
2403{
2404 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2405 unsigned int sg_offset = 0;
2406 int n = 0;
2407
2408 mr->meta_length = 0;
2409 if (data_sg_nents == 1) {
2410 n++;
2411 mr->ndescs = 1;
2412 if (data_sg_offset)
2413 sg_offset = *data_sg_offset;
2414 mr->data_length = sg_dma_len(data_sg) - sg_offset;
2415 mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2416 if (meta_sg_nents == 1) {
2417 n++;
2418 mr->meta_ndescs = 1;
2419 if (meta_sg_offset)
2420 sg_offset = *meta_sg_offset;
2421 else
2422 sg_offset = 0;
2423 mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2424 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2425 }
2426 ibmr->length = mr->data_length + mr->meta_length;
2427 }
2428
2429 return n;
2430}
2431
b005d316
SG
2432static int
2433mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2434 struct scatterlist *sgl,
ff2ba993 2435 unsigned short sg_nents,
6c984472
MG
2436 unsigned int *sg_offset_p,
2437 struct scatterlist *meta_sgl,
2438 unsigned short meta_sg_nents,
2439 unsigned int *meta_sg_offset_p)
b005d316
SG
2440{
2441 struct scatterlist *sg = sgl;
2442 struct mlx5_klm *klms = mr->descs;
9aa8b321 2443 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
b005d316 2444 u32 lkey = mr->ibmr.pd->local_dma_lkey;
6c984472 2445 int i, j = 0;
b005d316 2446
ff2ba993 2447 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
b005d316 2448 mr->ibmr.length = 0;
b005d316
SG
2449
2450 for_each_sg(sgl, sg, sg_nents, i) {
99975cd4 2451 if (unlikely(i >= mr->max_descs))
b005d316 2452 break;
ff2ba993
CH
2453 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2454 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
b005d316 2455 klms[i].key = cpu_to_be32(lkey);
0a49f2c3 2456 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
ff2ba993
CH
2457
2458 sg_offset = 0;
b005d316
SG
2459 }
2460
9aa8b321
BVA
2461 if (sg_offset_p)
2462 *sg_offset_p = sg_offset;
2463
6c984472
MG
2464 mr->ndescs = i;
2465 mr->data_length = mr->ibmr.length;
2466
2467 if (meta_sg_nents) {
2468 sg = meta_sgl;
2469 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2470 for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2471 if (unlikely(i + j >= mr->max_descs))
2472 break;
2473 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2474 sg_offset);
2475 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2476 sg_offset);
2477 klms[i + j].key = cpu_to_be32(lkey);
2478 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2479
2480 sg_offset = 0;
2481 }
2482 if (meta_sg_offset_p)
2483 *meta_sg_offset_p = sg_offset;
2484
2485 mr->meta_ndescs = j;
2486 mr->meta_length = mr->ibmr.length - mr->data_length;
2487 }
2488
2489 return i + j;
b005d316
SG
2490}
2491
8a187ee5
SG
2492static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2493{
2494 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2495 __be64 *descs;
2496
2497 if (unlikely(mr->ndescs == mr->max_descs))
2498 return -ENOMEM;
2499
2500 descs = mr->descs;
2501 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2502
2503 return 0;
2504}
2505
de0ae958
IR
2506static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2507{
2508 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2509 __be64 *descs;
2510
2511 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2512 return -ENOMEM;
2513
2514 descs = mr->descs;
2515 descs[mr->ndescs + mr->meta_ndescs++] =
2516 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2517
2518 return 0;
2519}
2520
2521static int
2522mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
6c984472
MG
2523 int data_sg_nents, unsigned int *data_sg_offset,
2524 struct scatterlist *meta_sg, int meta_sg_nents,
2525 unsigned int *meta_sg_offset)
2526{
2527 struct mlx5_ib_mr *mr = to_mmr(ibmr);
de0ae958 2528 struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
6c984472
MG
2529 int n;
2530
de0ae958
IR
2531 pi_mr->ndescs = 0;
2532 pi_mr->meta_ndescs = 0;
2533 pi_mr->meta_length = 0;
2534
2535 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2536 pi_mr->desc_size * pi_mr->max_descs,
2537 DMA_TO_DEVICE);
2538
2539 pi_mr->ibmr.page_size = ibmr->page_size;
2540 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2541 mlx5_set_page);
2542 if (n != data_sg_nents)
2543 return n;
2544
2563e2f3 2545 pi_mr->data_iova = pi_mr->ibmr.iova;
de0ae958
IR
2546 pi_mr->data_length = pi_mr->ibmr.length;
2547 pi_mr->ibmr.length = pi_mr->data_length;
2548 ibmr->length = pi_mr->data_length;
2549
2550 if (meta_sg_nents) {
2551 u64 page_mask = ~((u64)ibmr->page_size - 1);
2563e2f3 2552 u64 iova = pi_mr->data_iova;
de0ae958
IR
2553
2554 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2555 meta_sg_offset, mlx5_set_page_pi);
2556
2557 pi_mr->meta_length = pi_mr->ibmr.length;
2558 /*
2559 * PI address for the HW is the offset of the metadata address
2560 * relative to the first data page address.
2561 * It equals to first data page address + size of data pages +
2562 * metadata offset at the first metadata page
2563 */
2564 pi_mr->pi_iova = (iova & page_mask) +
2565 pi_mr->ndescs * ibmr->page_size +
2566 (pi_mr->ibmr.iova & ~page_mask);
2567 /*
2568 * In order to use one MTT MR for data and metadata, we register
2569 * also the gaps between the end of the data and the start of
2570 * the metadata (the sig MR will verify that the HW will access
2571 * to right addresses). This mapping is safe because we use
2572 * internal mkey for the registration.
2573 */
2574 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2575 pi_mr->ibmr.iova = iova;
2576 ibmr->length += pi_mr->meta_length;
2577 }
2578
2579 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2580 pi_mr->desc_size * pi_mr->max_descs,
2581 DMA_TO_DEVICE);
2582
2583 return n;
2584}
2585
2586static int
2587mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2588 int data_sg_nents, unsigned int *data_sg_offset,
2589 struct scatterlist *meta_sg, int meta_sg_nents,
2590 unsigned int *meta_sg_offset)
2591{
2592 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2593 struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2594 int n;
6c984472
MG
2595
2596 pi_mr->ndescs = 0;
2597 pi_mr->meta_ndescs = 0;
2598 pi_mr->meta_length = 0;
2599
2600 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2601 pi_mr->desc_size * pi_mr->max_descs,
2602 DMA_TO_DEVICE);
2603
2604 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2605 meta_sg, meta_sg_nents, meta_sg_offset);
2606
de0ae958
IR
2607 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2608 pi_mr->desc_size * pi_mr->max_descs,
2609 DMA_TO_DEVICE);
2610
6c984472 2611 /* This is zero-based memory region */
2563e2f3 2612 pi_mr->data_iova = 0;
6c984472 2613 pi_mr->ibmr.iova = 0;
de0ae958 2614 pi_mr->pi_iova = pi_mr->data_length;
6c984472 2615 ibmr->length = pi_mr->ibmr.length;
6c984472 2616
de0ae958
IR
2617 return n;
2618}
6c984472 2619
de0ae958
IR
2620int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2621 int data_sg_nents, unsigned int *data_sg_offset,
2622 struct scatterlist *meta_sg, int meta_sg_nents,
2623 unsigned int *meta_sg_offset)
2624{
2625 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2563e2f3 2626 struct mlx5_ib_mr *pi_mr = NULL;
de0ae958
IR
2627 int n;
2628
2629 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2630
2563e2f3
MG
2631 mr->ndescs = 0;
2632 mr->data_length = 0;
2633 mr->data_iova = 0;
2634 mr->meta_ndescs = 0;
2635 mr->pi_iova = 0;
2636 /*
2637 * As a performance optimization, if possible, there is no need to
2638 * perform UMR operation to register the data/metadata buffers.
2639 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2640 * Fallback to UMR only in case of a failure.
2641 */
2642 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2643 data_sg_offset, meta_sg, meta_sg_nents,
2644 meta_sg_offset);
2645 if (n == data_sg_nents + meta_sg_nents)
2646 goto out;
de0ae958
IR
2647 /*
2648 * As a performance optimization, if possible, there is no need to map
2649 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2650 * descriptors and fallback to KLM only in case of a failure.
2651 * It's more efficient for the HW to work with MTT descriptors
2652 * (especially in high load).
2653 * Use KLM (indirect access) only if it's mandatory.
2654 */
2563e2f3 2655 pi_mr = mr->mtt_mr;
de0ae958
IR
2656 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2657 data_sg_offset, meta_sg, meta_sg_nents,
2658 meta_sg_offset);
2659 if (n == data_sg_nents + meta_sg_nents)
2660 goto out;
2661
2662 pi_mr = mr->klm_mr;
2663 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2664 data_sg_offset, meta_sg, meta_sg_nents,
2665 meta_sg_offset);
6c984472
MG
2666 if (unlikely(n != data_sg_nents + meta_sg_nents))
2667 return -ENOMEM;
2668
de0ae958
IR
2669out:
2670 /* This is zero-based memory region */
2671 ibmr->iova = 0;
2672 mr->pi_mr = pi_mr;
2563e2f3
MG
2673 if (pi_mr)
2674 ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2675 else
2676 ibmr->sig_attrs->meta_length = mr->meta_length;
de0ae958 2677
6c984472
MG
2678 return 0;
2679}
2680
ff2ba993 2681int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
9aa8b321 2682 unsigned int *sg_offset)
8a187ee5
SG
2683{
2684 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2685 int n;
2686
2687 mr->ndescs = 0;
2688
2689 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2690 mr->desc_size * mr->max_descs,
2691 DMA_TO_DEVICE);
2692
ec22eb53 2693 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
6c984472
MG
2694 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2695 NULL);
b005d316 2696 else
ff2ba993
CH
2697 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2698 mlx5_set_page);
8a187ee5
SG
2699
2700 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2701 mr->desc_size * mr->max_descs,
2702 DMA_TO_DEVICE);
2703
2704 return n;
2705}