RDMA/uverbs: Check ODP in ib_check_mr_access() as well
[linux-2.6-block.git] / drivers / infiniband / hw / mlx5 / mr.c
CommitLineData
e126ba97 1/*
6cf0a15f 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
e126ba97
EC
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
746b5583 38#include <linux/delay.h>
e126ba97 39#include <rdma/ib_umem.h>
b4cfe447 40#include <rdma/ib_umem_odp.h>
968e78dd 41#include <rdma/ib_verbs.h>
e126ba97
EC
42#include "mlx5_ib.h"
43
f22c30aa
JG
44/*
45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
46 * work on kernel modules memory
47 */
8010d74b 48void *xlt_emergency_page;
f22c30aa
JG
49static DEFINE_MUTEX(xlt_emergency_page_mutex);
50
e126ba97 51enum {
746b5583 52 MAX_PENDING_REG_MR = 8,
e126ba97
EC
53};
54
832a6b06 55#define MLX5_UMR_ALIGN 2048
fe45f827 56
fc6a9f86
SM
57static void
58create_mkey_callback(int status, struct mlx5_async_work *context);
59
5eb29f0d
JG
60static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
61 struct ib_pd *pd)
62{
63 struct mlx5_ib_dev *dev = to_mdev(pd->device);
64
65 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
66 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
67 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
68 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
69 MLX5_SET(mkc, mkc, lr, 1);
70
71 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
72 MLX5_SET(mkc, mkc, relaxed_ordering_write,
73 !!(acc & IB_ACCESS_RELAXED_ORDERING));
74 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
75 MLX5_SET(mkc, mkc, relaxed_ordering_read,
76 !!(acc & IB_ACCESS_RELAXED_ORDERING));
77
78 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
79 MLX5_SET(mkc, mkc, qpn, 0xffffff);
80 MLX5_SET64(mkc, mkc, start_addr, start_addr);
81}
82
fc6a9f86
SM
83static void
84assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
85 u32 *in)
86{
f743ff3b 87 u8 key = atomic_inc_return(&dev->mkey_var);
fc6a9f86 88 void *mkc;
fc6a9f86
SM
89
90 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
91 MLX5_SET(mkc, mkc, mkey_7_0, key);
92 mkey->key = key;
93}
94
95static int
96mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
97 u32 *in, int inlen)
98{
99 assign_mkey_variant(dev, mkey, in);
100 return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
101}
102
103static int
104mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
105 struct mlx5_core_mkey *mkey,
106 struct mlx5_async_ctx *async_ctx,
107 u32 *in, int inlen, u32 *out, int outlen,
108 struct mlx5_async_work *context)
109{
a3cfdd39 110 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
fc6a9f86 111 assign_mkey_variant(dev, mkey, in);
a3cfdd39
MG
112 return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
113 create_mkey_callback, context);
fc6a9f86
SM
114}
115
eeea6953
LR
116static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
117static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
8b7ff7f3 118static int mr_cache_max_order(struct mlx5_ib_dev *dev);
1c78a21a 119static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
c8d75a98
MD
120
121static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
122{
123 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
124}
125
b4cfe447
HE
126static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
127{
806b101b 128 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
b4cfe447 129
806b101b 130 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
b4cfe447
HE
131}
132
8383da3e
JG
133static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
134 u64 length)
56e11d62 135{
b4d031cd
JG
136 if (!mr->cache_ent)
137 return false;
138 return ((u64)1 << mr->cache_ent->order) * MLX5_ADAPTER_PAGE_SIZE >=
56e11d62
NO
139 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
140}
141
fc6a9f86 142static void create_mkey_callback(int status, struct mlx5_async_work *context)
746b5583 143{
e355477e
JG
144 struct mlx5_ib_mr *mr =
145 container_of(context, struct mlx5_ib_mr, cb_work);
746b5583 146 struct mlx5_ib_dev *dev = mr->dev;
b91e1751 147 struct mlx5_cache_ent *ent = mr->cache_ent;
746b5583
EC
148 unsigned long flags;
149
746b5583
EC
150 if (status) {
151 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
152 kfree(mr);
b9358bdb
JG
153 spin_lock_irqsave(&ent->lock, flags);
154 ent->pending--;
155 WRITE_ONCE(dev->fill_delay, 1);
156 spin_unlock_irqrestore(&ent->lock, flags);
746b5583
EC
157 mod_timer(&dev->delay_timer, jiffies + HZ);
158 return;
159 }
160
aa8e08d2 161 mr->mmkey.type = MLX5_MKEY_MR;
54c62e13
SM
162 mr->mmkey.key |= mlx5_idx_to_mkey(
163 MLX5_GET(create_mkey_out, mr->out, mkey_index));
746b5583 164
b9358bdb 165 WRITE_ONCE(dev->cache.last_add, jiffies);
746b5583
EC
166
167 spin_lock_irqsave(&ent->lock, flags);
168 list_add_tail(&mr->list, &ent->head);
7c8691a3
JG
169 ent->available_mrs++;
170 ent->total_mrs++;
1c78a21a
JG
171 /* If we are doing fill_to_high_water then keep going. */
172 queue_adjust_cache_locked(ent);
b9358bdb 173 ent->pending--;
746b5583 174 spin_unlock_irqrestore(&ent->lock, flags);
aad719dc
JG
175}
176
177static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
178{
179 struct mlx5_ib_mr *mr;
180
181 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
182 if (!mr)
183 return NULL;
aad719dc
JG
184 mr->cache_ent = ent;
185 mr->dev = ent->dev;
8605933a 186
5eb29f0d 187 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
aad719dc
JG
188 MLX5_SET(mkc, mkc, free, 1);
189 MLX5_SET(mkc, mkc, umr_en, 1);
190 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
191 MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
192
aad719dc
JG
193 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
194 MLX5_SET(mkc, mkc, log_page_size, ent->page);
195 return mr;
746b5583
EC
196}
197
aad719dc 198/* Asynchronously schedule new MRs to be populated in the cache. */
a1d8854a 199static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
e126ba97 200{
aad719dc 201 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
e126ba97 202 struct mlx5_ib_mr *mr;
ec22eb53
SM
203 void *mkc;
204 u32 *in;
e126ba97
EC
205 int err = 0;
206 int i;
207
ec22eb53 208 in = kzalloc(inlen, GFP_KERNEL);
e126ba97
EC
209 if (!in)
210 return -ENOMEM;
211
ec22eb53 212 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
e126ba97 213 for (i = 0; i < num; i++) {
aad719dc 214 mr = alloc_cache_mr(ent, mkc);
e126ba97
EC
215 if (!mr) {
216 err = -ENOMEM;
746b5583 217 break;
e126ba97 218 }
746b5583 219 spin_lock_irq(&ent->lock);
b9358bdb
JG
220 if (ent->pending >= MAX_PENDING_REG_MR) {
221 err = -EAGAIN;
222 spin_unlock_irq(&ent->lock);
223 kfree(mr);
224 break;
225 }
746b5583
EC
226 ent->pending++;
227 spin_unlock_irq(&ent->lock);
b91e1751
JG
228 err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
229 &ent->dev->async_ctx, in, inlen,
230 mr->out, sizeof(mr->out),
231 &mr->cb_work);
e126ba97 232 if (err) {
d14e7110
EC
233 spin_lock_irq(&ent->lock);
234 ent->pending--;
235 spin_unlock_irq(&ent->lock);
b91e1751 236 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
e126ba97 237 kfree(mr);
746b5583 238 break;
e126ba97 239 }
e126ba97
EC
240 }
241
e126ba97
EC
242 kfree(in);
243 return err;
244}
245
aad719dc
JG
246/* Synchronously create a MR in the cache */
247static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
248{
249 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
250 struct mlx5_ib_mr *mr;
251 void *mkc;
252 u32 *in;
253 int err;
254
255 in = kzalloc(inlen, GFP_KERNEL);
256 if (!in)
257 return ERR_PTR(-ENOMEM);
258 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
259
260 mr = alloc_cache_mr(ent, mkc);
261 if (!mr) {
262 err = -ENOMEM;
263 goto free_in;
264 }
265
266 err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
267 if (err)
268 goto free_mr;
269
270 mr->mmkey.type = MLX5_MKEY_MR;
271 WRITE_ONCE(ent->dev->cache.last_add, jiffies);
272 spin_lock_irq(&ent->lock);
273 ent->total_mrs++;
274 spin_unlock_irq(&ent->lock);
275 kfree(in);
276 return mr;
277free_mr:
278 kfree(mr);
279free_in:
280 kfree(in);
281 return ERR_PTR(err);
282}
283
b9358bdb 284static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
e126ba97 285{
e126ba97 286 struct mlx5_ib_mr *mr;
e126ba97 287
b9358bdb
JG
288 lockdep_assert_held(&ent->lock);
289 if (list_empty(&ent->head))
a1d8854a 290 return;
a1d8854a
JG
291 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
292 list_del(&mr->list);
293 ent->available_mrs--;
294 ent->total_mrs--;
295 spin_unlock_irq(&ent->lock);
296 mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
297 kfree(mr);
b9358bdb 298 spin_lock_irq(&ent->lock);
a1d8854a 299}
65edd0e7 300
a1d8854a
JG
301static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
302 bool limit_fill)
303{
304 int err;
305
306 lockdep_assert_held(&ent->lock);
307
308 while (true) {
309 if (limit_fill)
310 target = ent->limit * 2;
311 if (target == ent->available_mrs + ent->pending)
312 return 0;
313 if (target > ent->available_mrs + ent->pending) {
314 u32 todo = target - (ent->available_mrs + ent->pending);
315
316 spin_unlock_irq(&ent->lock);
317 err = add_keys(ent, todo);
318 if (err == -EAGAIN)
319 usleep_range(3000, 5000);
320 spin_lock_irq(&ent->lock);
321 if (err) {
322 if (err != -EAGAIN)
323 return err;
324 } else
325 return 0;
326 } else {
b9358bdb 327 remove_cache_mr_locked(ent);
a1d8854a 328 }
e126ba97
EC
329 }
330}
331
332static ssize_t size_write(struct file *filp, const char __user *buf,
333 size_t count, loff_t *pos)
334{
335 struct mlx5_cache_ent *ent = filp->private_data;
a1d8854a 336 u32 target;
e126ba97 337 int err;
e126ba97 338
a1d8854a
JG
339 err = kstrtou32_from_user(buf, count, 0, &target);
340 if (err)
341 return err;
746b5583 342
a1d8854a
JG
343 /*
344 * Target is the new value of total_mrs the user requests, however we
345 * cannot free MRs that are in use. Compute the target value for
346 * available_mrs.
347 */
348 spin_lock_irq(&ent->lock);
349 if (target < ent->total_mrs - ent->available_mrs) {
350 err = -EINVAL;
351 goto err_unlock;
e126ba97 352 }
a1d8854a
JG
353 target = target - (ent->total_mrs - ent->available_mrs);
354 if (target < ent->limit || target > ent->limit*2) {
355 err = -EINVAL;
356 goto err_unlock;
357 }
358 err = resize_available_mrs(ent, target, false);
359 if (err)
360 goto err_unlock;
361 spin_unlock_irq(&ent->lock);
e126ba97
EC
362
363 return count;
a1d8854a
JG
364
365err_unlock:
366 spin_unlock_irq(&ent->lock);
367 return err;
e126ba97
EC
368}
369
370static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
371 loff_t *pos)
372{
373 struct mlx5_cache_ent *ent = filp->private_data;
374 char lbuf[20];
375 int err;
376
7c8691a3 377 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
e126ba97
EC
378 if (err < 0)
379 return err;
380
60e6627f 381 return simple_read_from_buffer(buf, count, pos, lbuf, err);
e126ba97
EC
382}
383
384static const struct file_operations size_fops = {
385 .owner = THIS_MODULE,
386 .open = simple_open,
387 .write = size_write,
388 .read = size_read,
389};
390
391static ssize_t limit_write(struct file *filp, const char __user *buf,
392 size_t count, loff_t *pos)
393{
394 struct mlx5_cache_ent *ent = filp->private_data;
e126ba97
EC
395 u32 var;
396 int err;
e126ba97 397
a1d8854a
JG
398 err = kstrtou32_from_user(buf, count, 0, &var);
399 if (err)
400 return err;
e126ba97 401
a1d8854a
JG
402 /*
403 * Upon set we immediately fill the cache to high water mark implied by
404 * the limit.
405 */
406 spin_lock_irq(&ent->lock);
e126ba97 407 ent->limit = var;
a1d8854a
JG
408 err = resize_available_mrs(ent, 0, true);
409 spin_unlock_irq(&ent->lock);
410 if (err)
411 return err;
e126ba97
EC
412 return count;
413}
414
415static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
416 loff_t *pos)
417{
418 struct mlx5_cache_ent *ent = filp->private_data;
419 char lbuf[20];
420 int err;
421
e126ba97
EC
422 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
423 if (err < 0)
424 return err;
425
60e6627f 426 return simple_read_from_buffer(buf, count, pos, lbuf, err);
e126ba97
EC
427}
428
429static const struct file_operations limit_fops = {
430 .owner = THIS_MODULE,
431 .open = simple_open,
432 .write = limit_write,
433 .read = limit_read,
434};
435
b9358bdb 436static bool someone_adding(struct mlx5_mr_cache *cache)
e126ba97 437{
b9358bdb 438 unsigned int i;
e126ba97
EC
439
440 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
b9358bdb
JG
441 struct mlx5_cache_ent *ent = &cache->ent[i];
442 bool ret;
e126ba97 443
b9358bdb
JG
444 spin_lock_irq(&ent->lock);
445 ret = ent->available_mrs < ent->limit;
446 spin_unlock_irq(&ent->lock);
447 if (ret)
448 return true;
449 }
450 return false;
e126ba97
EC
451}
452
ad2d3ef4
JG
453/*
454 * Check if the bucket is outside the high/low water mark and schedule an async
455 * update. The cache refill has hysteresis, once the low water mark is hit it is
456 * refilled up to the high mark.
457 */
458static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
459{
460 lockdep_assert_held(&ent->lock);
461
1c78a21a 462 if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
b9358bdb 463 return;
1c78a21a
JG
464 if (ent->available_mrs < ent->limit) {
465 ent->fill_to_high_water = true;
466 queue_work(ent->dev->cache.wq, &ent->work);
467 } else if (ent->fill_to_high_water &&
468 ent->available_mrs + ent->pending < 2 * ent->limit) {
469 /*
470 * Once we start populating due to hitting a low water mark
471 * continue until we pass the high water mark.
472 */
ad2d3ef4 473 queue_work(ent->dev->cache.wq, &ent->work);
1c78a21a
JG
474 } else if (ent->available_mrs == 2 * ent->limit) {
475 ent->fill_to_high_water = false;
476 } else if (ent->available_mrs > 2 * ent->limit) {
477 /* Queue deletion of excess entries */
478 ent->fill_to_high_water = false;
479 if (ent->pending)
480 queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
481 msecs_to_jiffies(1000));
482 else
483 queue_work(ent->dev->cache.wq, &ent->work);
484 }
ad2d3ef4
JG
485}
486
e126ba97
EC
487static void __cache_work_func(struct mlx5_cache_ent *ent)
488{
489 struct mlx5_ib_dev *dev = ent->dev;
490 struct mlx5_mr_cache *cache = &dev->cache;
746b5583 491 int err;
e126ba97 492
b9358bdb
JG
493 spin_lock_irq(&ent->lock);
494 if (ent->disabled)
495 goto out;
e126ba97 496
1c78a21a
JG
497 if (ent->fill_to_high_water &&
498 ent->available_mrs + ent->pending < 2 * ent->limit &&
b9358bdb
JG
499 !READ_ONCE(dev->fill_delay)) {
500 spin_unlock_irq(&ent->lock);
b91e1751 501 err = add_keys(ent, 1);
b9358bdb
JG
502 spin_lock_irq(&ent->lock);
503 if (ent->disabled)
504 goto out;
505 if (err) {
aad719dc
JG
506 /*
507 * EAGAIN only happens if pending is positive, so we
508 * will be rescheduled from reg_mr_callback(). The only
509 * failure path here is ENOMEM.
510 */
511 if (err != -EAGAIN) {
b9358bdb
JG
512 mlx5_ib_warn(
513 dev,
514 "command failed order %d, err %d\n",
515 ent->order, err);
746b5583
EC
516 queue_delayed_work(cache->wq, &ent->dwork,
517 msecs_to_jiffies(1000));
746b5583
EC
518 }
519 }
7c8691a3 520 } else if (ent->available_mrs > 2 * ent->limit) {
b9358bdb
JG
521 bool need_delay;
522
ab5cdc31 523 /*
a1d8854a
JG
524 * The remove_cache_mr() logic is performed as garbage
525 * collection task. Such task is intended to be run when no
526 * other active processes are running.
ab5cdc31
LR
527 *
528 * The need_resched() will return TRUE if there are user tasks
529 * to be activated in near future.
530 *
a1d8854a
JG
531 * In such case, we don't execute remove_cache_mr() and postpone
532 * the garbage collection work to try to run in next cycle, in
533 * order to free CPU resources to other tasks.
ab5cdc31 534 */
b9358bdb
JG
535 spin_unlock_irq(&ent->lock);
536 need_delay = need_resched() || someone_adding(cache) ||
537 time_after(jiffies,
538 READ_ONCE(cache->last_add) + 300 * HZ);
539 spin_lock_irq(&ent->lock);
540 if (ent->disabled)
541 goto out;
542 if (need_delay)
746b5583 543 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
b9358bdb
JG
544 remove_cache_mr_locked(ent);
545 queue_adjust_cache_locked(ent);
e126ba97 546 }
b9358bdb
JG
547out:
548 spin_unlock_irq(&ent->lock);
e126ba97
EC
549}
550
551static void delayed_cache_work_func(struct work_struct *work)
552{
553 struct mlx5_cache_ent *ent;
554
555 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
556 __cache_work_func(ent);
557}
558
559static void cache_work_func(struct work_struct *work)
560{
561 struct mlx5_cache_ent *ent;
562
563 ent = container_of(work, struct mlx5_cache_ent, work);
564 __cache_work_func(ent);
565}
566
b91e1751
JG
567/* Allocate a special entry from the cache */
568struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
8383da3e 569 unsigned int entry, int access_flags)
49780d42
AK
570{
571 struct mlx5_mr_cache *cache = &dev->cache;
572 struct mlx5_cache_ent *ent;
573 struct mlx5_ib_mr *mr;
49780d42 574
b91e1751
JG
575 if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
576 entry >= ARRAY_SIZE(cache->ent)))
546d3009 577 return ERR_PTR(-EINVAL);
49780d42 578
8383da3e
JG
579 /* Matches access in alloc_cache_mr() */
580 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
581 return ERR_PTR(-EOPNOTSUPP);
582
49780d42 583 ent = &cache->ent[entry];
aad719dc
JG
584 spin_lock_irq(&ent->lock);
585 if (list_empty(&ent->head)) {
586 spin_unlock_irq(&ent->lock);
587 mr = create_cache_mr(ent);
588 if (IS_ERR(mr))
49780d42 589 return mr;
aad719dc
JG
590 } else {
591 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
592 list_del(&mr->list);
593 ent->available_mrs--;
594 queue_adjust_cache_locked(ent);
595 spin_unlock_irq(&ent->lock);
49780d42 596 }
8383da3e 597 mr->access_flags = access_flags;
aad719dc 598 return mr;
49780d42
AK
599}
600
aad719dc
JG
601/* Return a MR already available in the cache */
602static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
e126ba97 603{
b91e1751 604 struct mlx5_ib_dev *dev = req_ent->dev;
e126ba97 605 struct mlx5_ib_mr *mr = NULL;
b91e1751 606 struct mlx5_cache_ent *ent = req_ent;
e126ba97 607
b91e1751
JG
608 /* Try larger MR pools from the cache to satisfy the allocation */
609 for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
610 mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
611 ent - dev->cache.ent);
e126ba97 612
746b5583 613 spin_lock_irq(&ent->lock);
e126ba97
EC
614 if (!list_empty(&ent->head)) {
615 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
616 list);
617 list_del(&mr->list);
7c8691a3 618 ent->available_mrs--;
ad2d3ef4 619 queue_adjust_cache_locked(ent);
746b5583 620 spin_unlock_irq(&ent->lock);
e126ba97
EC
621 break;
622 }
ad2d3ef4 623 queue_adjust_cache_locked(ent);
746b5583 624 spin_unlock_irq(&ent->lock);
e126ba97
EC
625 }
626
627 if (!mr)
b91e1751 628 req_ent->miss++;
e126ba97
EC
629
630 return mr;
631}
632
1769c4c5
JG
633static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
634{
635 struct mlx5_cache_ent *ent = mr->cache_ent;
636
637 mr->cache_ent = NULL;
638 spin_lock_irq(&ent->lock);
639 ent->total_mrs--;
640 spin_unlock_irq(&ent->lock);
641}
642
49780d42 643void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
e126ba97 644{
b91e1751 645 struct mlx5_cache_ent *ent = mr->cache_ent;
e126ba97 646
b91e1751 647 if (!ent)
dd9a4034
VF
648 return;
649
09689703 650 if (mlx5_mr_cache_invalidate(mr)) {
1769c4c5 651 detach_mr_from_cache(mr);
afd14174 652 destroy_mkey(dev, mr);
e126ba97
EC
653 return;
654 }
49780d42 655
746b5583 656 spin_lock_irq(&ent->lock);
e126ba97 657 list_add_tail(&mr->list, &ent->head);
7c8691a3 658 ent->available_mrs++;
ad2d3ef4 659 queue_adjust_cache_locked(ent);
746b5583 660 spin_unlock_irq(&ent->lock);
e126ba97
EC
661}
662
663static void clean_keys(struct mlx5_ib_dev *dev, int c)
664{
e126ba97
EC
665 struct mlx5_mr_cache *cache = &dev->cache;
666 struct mlx5_cache_ent *ent = &cache->ent[c];
65edd0e7 667 struct mlx5_ib_mr *tmp_mr;
e126ba97 668 struct mlx5_ib_mr *mr;
65edd0e7 669 LIST_HEAD(del_list);
e126ba97 670
3c461911 671 cancel_delayed_work(&ent->dwork);
e126ba97 672 while (1) {
746b5583 673 spin_lock_irq(&ent->lock);
e126ba97 674 if (list_empty(&ent->head)) {
746b5583 675 spin_unlock_irq(&ent->lock);
65edd0e7 676 break;
e126ba97
EC
677 }
678 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
65edd0e7 679 list_move(&mr->list, &del_list);
7c8691a3
JG
680 ent->available_mrs--;
681 ent->total_mrs--;
746b5583 682 spin_unlock_irq(&ent->lock);
65edd0e7
DJ
683 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
684 }
685
65edd0e7
DJ
686 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
687 list_del(&mr->list);
688 kfree(mr);
e126ba97
EC
689 }
690}
691
12cc1a02
LR
692static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
693{
6a4d00be 694 if (!mlx5_debugfs_root || dev->is_rep)
12cc1a02
LR
695 return;
696
697 debugfs_remove_recursive(dev->cache.root);
698 dev->cache.root = NULL;
699}
700
73eb8f03 701static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
e126ba97
EC
702{
703 struct mlx5_mr_cache *cache = &dev->cache;
704 struct mlx5_cache_ent *ent;
73eb8f03 705 struct dentry *dir;
e126ba97
EC
706 int i;
707
6a4d00be 708 if (!mlx5_debugfs_root || dev->is_rep)
73eb8f03 709 return;
e126ba97 710
9603b61d 711 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
e126ba97
EC
712
713 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
714 ent = &cache->ent[i];
715 sprintf(ent->name, "%d", ent->order);
73eb8f03
GKH
716 dir = debugfs_create_dir(ent->name, cache->root);
717 debugfs_create_file("size", 0600, dir, ent, &size_fops);
718 debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
7c8691a3 719 debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
73eb8f03 720 debugfs_create_u32("miss", 0600, dir, &ent->miss);
e126ba97 721 }
e126ba97
EC
722}
723
e99e88a9 724static void delay_time_func(struct timer_list *t)
746b5583 725{
e99e88a9 726 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
746b5583 727
b9358bdb 728 WRITE_ONCE(dev->fill_delay, 0);
746b5583
EC
729}
730
e126ba97
EC
731int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
732{
733 struct mlx5_mr_cache *cache = &dev->cache;
734 struct mlx5_cache_ent *ent;
e126ba97
EC
735 int i;
736
6bc1a656 737 mutex_init(&dev->slow_path_mutex);
3c856c82 738 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
e126ba97
EC
739 if (!cache->wq) {
740 mlx5_ib_warn(dev, "failed to create work queue\n");
741 return -ENOMEM;
742 }
743
e355477e 744 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
e99e88a9 745 timer_setup(&dev->delay_timer, delay_time_func, 0);
e126ba97 746 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
e126ba97
EC
747 ent = &cache->ent[i];
748 INIT_LIST_HEAD(&ent->head);
749 spin_lock_init(&ent->lock);
750 ent->order = i + 2;
751 ent->dev = dev;
49780d42 752 ent->limit = 0;
e126ba97 753
e126ba97
EC
754 INIT_WORK(&ent->work, cache_work_func);
755 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
49780d42 756
8b7ff7f3 757 if (i > MR_CACHE_LAST_STD_ENTRY) {
81713d37 758 mlx5_odp_init_mr_cache_entry(ent);
49780d42 759 continue;
81713d37 760 }
49780d42 761
8b7ff7f3 762 if (ent->order > mr_cache_max_order(dev))
49780d42
AK
763 continue;
764
765 ent->page = PAGE_SHIFT;
766 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
767 MLX5_IB_UMR_OCTOWORD;
768 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
769 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
8383da3e
JG
770 !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
771 mlx5_ib_can_load_pas_with_umr(dev, 0))
49780d42
AK
772 ent->limit = dev->mdev->profile->mr_cache[i].limit;
773 else
774 ent->limit = 0;
ad2d3ef4
JG
775 spin_lock_irq(&ent->lock);
776 queue_adjust_cache_locked(ent);
777 spin_unlock_irq(&ent->lock);
e126ba97
EC
778 }
779
73eb8f03 780 mlx5_mr_cache_debugfs_init(dev);
12cc1a02 781
e126ba97
EC
782 return 0;
783}
784
785int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
786{
b9358bdb 787 unsigned int i;
e126ba97 788
32927e28
MB
789 if (!dev->cache.wq)
790 return 0;
791
b9358bdb
JG
792 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
793 struct mlx5_cache_ent *ent = &dev->cache.ent[i];
794
795 spin_lock_irq(&ent->lock);
796 ent->disabled = true;
797 spin_unlock_irq(&ent->lock);
798 cancel_work_sync(&ent->work);
799 cancel_delayed_work_sync(&ent->dwork);
800 }
e126ba97
EC
801
802 mlx5_mr_cache_debugfs_cleanup(dev);
e355477e 803 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
e126ba97
EC
804
805 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
806 clean_keys(dev, i);
807
3c461911 808 destroy_workqueue(dev->cache.wq);
746b5583 809 del_timer_sync(&dev->delay_timer);
3c461911 810
e126ba97
EC
811 return 0;
812}
813
814struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
815{
816 struct mlx5_ib_dev *dev = to_mdev(pd->device);
ec22eb53 817 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
e126ba97 818 struct mlx5_ib_mr *mr;
ec22eb53
SM
819 void *mkc;
820 u32 *in;
e126ba97
EC
821 int err;
822
823 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
824 if (!mr)
825 return ERR_PTR(-ENOMEM);
826
ec22eb53 827 in = kzalloc(inlen, GFP_KERNEL);
e126ba97
EC
828 if (!in) {
829 err = -ENOMEM;
830 goto err_free;
831 }
832
ec22eb53
SM
833 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
834
cdbd0d2b 835 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
ec22eb53 836 MLX5_SET(mkc, mkc, length64, 1);
03232cc4 837 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
ec22eb53 838
fc6a9f86 839 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
e126ba97
EC
840 if (err)
841 goto err_in;
842
843 kfree(in);
aa8e08d2 844 mr->mmkey.type = MLX5_MKEY_MR;
a606b0f6
MB
845 mr->ibmr.lkey = mr->mmkey.key;
846 mr->ibmr.rkey = mr->mmkey.key;
e126ba97
EC
847 mr->umem = NULL;
848
849 return &mr->ibmr;
850
851err_in:
852 kfree(in);
853
854err_free:
855 kfree(mr);
856
857 return ERR_PTR(err);
858}
859
7b4cdaae 860static int get_octo_len(u64 addr, u64 len, int page_shift)
e126ba97 861{
7b4cdaae 862 u64 page_size = 1ULL << page_shift;
e126ba97
EC
863 u64 offset;
864 int npages;
865
866 offset = addr & (page_size - 1);
7b4cdaae 867 npages = ALIGN(len + offset, page_size) >> page_shift;
e126ba97
EC
868 return (npages + 1) / 2;
869}
870
8b7ff7f3 871static int mr_cache_max_order(struct mlx5_ib_dev *dev)
e126ba97 872{
7d0cc6ed 873 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
8b7ff7f3 874 return MR_CACHE_LAST_STD_ENTRY + 2;
4c25b7a3
MD
875 return MLX5_MAX_UMR_SHIFT;
876}
877
f0093fb1
JG
878static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start,
879 u64 length, int access_flags)
395a8e4c 880{
b4bd701a 881 struct ib_umem *u;
14ab8896 882
261dc53f
JG
883 if (access_flags & IB_ACCESS_ON_DEMAND) {
884 struct ib_umem_odp *odp;
885
c320e527 886 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
f25a546e 887 &mlx5_mn_ops);
261dc53f
JG
888 if (IS_ERR(odp)) {
889 mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
890 PTR_ERR(odp));
f0093fb1 891 return ERR_CAST(odp);
261dc53f 892 }
f0093fb1 893 return &odp->umem;
395a8e4c
NO
894 }
895
f0093fb1
JG
896 u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
897 if (IS_ERR(u)) {
898 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
899 return u;
395a8e4c 900 }
f0093fb1 901 return u;
395a8e4c
NO
902}
903
add08d76 904static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
e126ba97 905{
add08d76
CH
906 struct mlx5_ib_umr_context *context =
907 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
e126ba97 908
add08d76
CH
909 context->status = wc->status;
910 complete(&context->done);
911}
e126ba97 912
add08d76
CH
913static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
914{
915 context->cqe.done = mlx5_ib_umr_done;
916 context->status = -1;
917 init_completion(&context->done);
e126ba97
EC
918}
919
d5ea2df9
BJ
920static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
921 struct mlx5_umr_wr *umrwr)
922{
923 struct umr_common *umrc = &dev->umrc;
d34ac5cd 924 const struct ib_send_wr *bad;
d5ea2df9
BJ
925 int err;
926 struct mlx5_ib_umr_context umr_context;
927
928 mlx5_ib_init_umr_context(&umr_context);
929 umrwr->wr.wr_cqe = &umr_context.cqe;
930
931 down(&umrc->sem);
932 err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
933 if (err) {
934 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
935 } else {
936 wait_for_completion(&umr_context.done);
937 if (umr_context.status != IB_WC_SUCCESS) {
938 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
939 umr_context.status);
940 err = -EFAULT;
941 }
942 }
943 up(&umrc->sem);
944 return err;
945}
946
b91e1751
JG
947static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
948 unsigned int order)
949{
950 struct mlx5_mr_cache *cache = &dev->cache;
951
952 if (order < cache->ent[0].order)
953 return &cache->ent[0];
954 order = order - cache->ent[0].order;
955 if (order > MR_CACHE_LAST_STD_ENTRY)
956 return NULL;
957 return &cache->ent[order];
958}
959
f0093fb1
JG
960static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
961 struct ib_umem *umem, u64 iova,
962 int access_flags)
e126ba97
EC
963{
964 struct mlx5_ib_dev *dev = to_mdev(pd->device);
f0093fb1 965 struct mlx5_cache_ent *ent;
e126ba97 966 struct mlx5_ib_mr *mr;
d5c7916f 967 unsigned int page_size;
e126ba97 968
d5c7916f
JG
969 page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
970 if (WARN_ON(!page_size))
971 return ERR_PTR(-EINVAL);
972 ent = mr_cache_ent_from_order(
973 dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
b91e1751
JG
974 if (!ent)
975 return ERR_PTR(-E2BIG);
8383da3e
JG
976
977 /* Matches access in alloc_cache_mr() */
978 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
979 return ERR_PTR(-EOPNOTSUPP);
980
aad719dc
JG
981 mr = get_cache_mr(ent);
982 if (!mr) {
983 mr = create_cache_mr(ent);
984 if (IS_ERR(mr))
985 return mr;
e126ba97
EC
986 }
987
7d0cc6ed
AK
988 mr->ibmr.pd = pd;
989 mr->umem = umem;
990 mr->access_flags = access_flags;
991 mr->desc_size = sizeof(struct mlx5_mtt);
f0093fb1
JG
992 mr->mmkey.iova = iova;
993 mr->mmkey.size = umem->length;
a606b0f6 994 mr->mmkey.pd = to_mpd(pd)->pdn;
d5c7916f 995 mr->page_shift = order_base_2(page_size);
b475598a 996
e126ba97 997 return mr;
e126ba97
EC
998}
999
7d0cc6ed
AK
1000#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
1001 MLX5_UMR_MTT_ALIGNMENT)
1002#define MLX5_SPARE_UMR_CHUNK 0x10000
1003
8010d74b
JG
1004/*
1005 * Allocate a temporary buffer to hold the per-page information to transfer to
1006 * HW. For efficiency this should be as large as it can be, but buffer
1007 * allocation failure is not allowed, so try smaller sizes.
1008 */
1009static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
f22c30aa 1010{
8010d74b
JG
1011 const size_t xlt_chunk_align =
1012 MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
1013 size_t size;
1014 void *res = NULL;
1015
1016 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
1017
1018 /*
1019 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
1020 * allocation can't trigger any kind of reclaim.
1021 */
1022 might_sleep();
1023
1024 gfp_mask |= __GFP_ZERO;
1025
1026 /*
1027 * If the system already has a suitable high order page then just use
1028 * that, but don't try hard to create one. This max is about 1M, so a
1029 * free x86 huge page will satisfy it.
1030 */
1031 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
1032 MLX5_MAX_UMR_CHUNK);
1033 *nents = size / ent_size;
1034 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1035 get_order(size));
1036 if (res)
1037 return res;
1038
1039 if (size > MLX5_SPARE_UMR_CHUNK) {
1040 size = MLX5_SPARE_UMR_CHUNK;
1041 *nents = get_order(size) / ent_size;
1042 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1043 get_order(size));
1044 if (res)
1045 return res;
1046 }
1047
1048 *nents = PAGE_SIZE / ent_size;
1049 res = (void *)__get_free_page(gfp_mask);
1050 if (res)
1051 return res;
1052
f22c30aa 1053 mutex_lock(&xlt_emergency_page_mutex);
8010d74b 1054 memset(xlt_emergency_page, 0, PAGE_SIZE);
f22c30aa
JG
1055 return xlt_emergency_page;
1056}
1057
8010d74b 1058static void mlx5_ib_free_xlt(void *xlt, size_t length)
f22c30aa 1059{
8010d74b
JG
1060 if (xlt == xlt_emergency_page) {
1061 mutex_unlock(&xlt_emergency_page_mutex);
1062 return;
1063 }
1064
1065 free_pages((unsigned long)xlt, get_order(length));
1066}
1067
1068/*
1069 * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
1070 * submission.
1071 */
1072static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
1073 struct mlx5_umr_wr *wr, struct ib_sge *sg,
1074 size_t nents, size_t ent_size,
1075 unsigned int flags)
1076{
1077 struct mlx5_ib_dev *dev = mr->dev;
7ec3df17 1078 struct device *ddev = &dev->mdev->pdev->dev;
8010d74b
JG
1079 dma_addr_t dma;
1080 void *xlt;
1081
1082 xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
1083 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
1084 GFP_KERNEL);
1085 sg->length = nents * ent_size;
1086 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
1087 if (dma_mapping_error(ddev, dma)) {
1088 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1089 mlx5_ib_free_xlt(xlt, sg->length);
1090 return NULL;
1091 }
1092 sg->addr = dma;
1093 sg->lkey = dev->umrc.pd->local_dma_lkey;
1094
1095 memset(wr, 0, sizeof(*wr));
1096 wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1097 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1098 wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1099 wr->wr.sg_list = sg;
1100 wr->wr.num_sge = 1;
1101 wr->wr.opcode = MLX5_IB_WR_UMR;
1102 wr->pd = mr->ibmr.pd;
1103 wr->mkey = mr->mmkey.key;
1104 wr->length = mr->mmkey.size;
1105 wr->virt_addr = mr->mmkey.iova;
1106 wr->access_flags = mr->access_flags;
1107 wr->page_shift = mr->page_shift;
1108 wr->xlt_size = sg->length;
1109 return xlt;
1110}
1111
1112static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
1113 struct ib_sge *sg)
1114{
7ec3df17 1115 struct device *ddev = &dev->mdev->pdev->dev;
8010d74b
JG
1116
1117 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
1118 mlx5_ib_free_xlt(xlt, sg->length);
f22c30aa
JG
1119}
1120
f1eaac37
JG
1121static unsigned int xlt_wr_final_send_flags(unsigned int flags)
1122{
1123 unsigned int res = 0;
1124
1125 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1126 res |= MLX5_IB_SEND_UMR_ENABLE_MR |
1127 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1128 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1129 if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
1130 res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1131 if (flags & MLX5_IB_UPD_XLT_ADDR)
1132 res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1133 return res;
1134}
1135
7d0cc6ed
AK
1136int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1137 int page_shift, int flags)
1138{
1139 struct mlx5_ib_dev *dev = mr->dev;
7ec3df17 1140 struct device *ddev = &dev->mdev->pdev->dev;
7d0cc6ed 1141 void *xlt;
e622f2f4 1142 struct mlx5_umr_wr wr;
832a6b06
HE
1143 struct ib_sge sg;
1144 int err = 0;
81713d37
AK
1145 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1146 ? sizeof(struct mlx5_klm)
1147 : sizeof(struct mlx5_mtt);
7d0cc6ed
AK
1148 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1149 const int page_mask = page_align - 1;
832a6b06
HE
1150 size_t pages_mapped = 0;
1151 size_t pages_to_map = 0;
8010d74b 1152 size_t pages_iter;
cbe4b8f0 1153 size_t size_to_map = 0;
8010d74b 1154 size_t orig_sg_length;
832a6b06 1155
c8d75a98
MD
1156 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1157 !umr_can_use_indirect_mkey(dev))
1158 return -EPERM;
832a6b06 1159
f1eaac37
JG
1160 if (WARN_ON(!mr->umem->is_odp))
1161 return -EINVAL;
1162
832a6b06 1163 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
7d0cc6ed
AK
1164 * so we need to align the offset and length accordingly
1165 */
1166 if (idx & page_mask) {
1167 npages += idx & page_mask;
1168 idx &= ~page_mask;
832a6b06 1169 }
7d0cc6ed 1170 pages_to_map = ALIGN(npages, page_align);
7d0cc6ed 1171
8010d74b
JG
1172 xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
1173 if (!xlt)
1174 return -ENOMEM;
1175 pages_iter = sg.length / desc_size;
1176 orig_sg_length = sg.length;
832a6b06 1177
f1eaac37
JG
1178 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1179 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1180 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
cbe4b8f0 1181
f1eaac37 1182 pages_to_map = min_t(size_t, pages_to_map, max_pages);
cbe4b8f0
AK
1183 }
1184
7d0cc6ed
AK
1185 wr.page_shift = page_shift;
1186
832a6b06
HE
1187 for (pages_mapped = 0;
1188 pages_mapped < pages_to_map && !err;
7d0cc6ed 1189 pages_mapped += pages_iter, idx += pages_iter) {
438b228e 1190 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
cbe4b8f0 1191 size_to_map = npages * desc_size;
8010d74b
JG
1192 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1193 DMA_TO_DEVICE);
f1eaac37 1194 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
8010d74b
JG
1195 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1196 DMA_TO_DEVICE);
832a6b06 1197
cbe4b8f0 1198 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
7d0cc6ed 1199
f1eaac37
JG
1200 if (pages_mapped + pages_iter >= pages_to_map)
1201 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
832a6b06 1202
7d0cc6ed 1203 wr.offset = idx * desc_size;
31616255 1204 wr.xlt_size = sg.length;
832a6b06 1205
d5ea2df9 1206 err = mlx5_ib_post_send_wait(dev, &wr);
832a6b06 1207 }
8010d74b
JG
1208 sg.length = orig_sg_length;
1209 mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
832a6b06
HE
1210 return err;
1211}
832a6b06 1212
f1eaac37
JG
1213/*
1214 * Send the DMA list to the HW for a normal MR using UMR.
1215 */
1216static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
1217{
1218 struct mlx5_ib_dev *dev = mr->dev;
7ec3df17 1219 struct device *ddev = &dev->mdev->pdev->dev;
f1eaac37
JG
1220 struct ib_block_iter biter;
1221 struct mlx5_mtt *cur_mtt;
1222 struct mlx5_umr_wr wr;
1223 size_t orig_sg_length;
1224 struct mlx5_mtt *mtt;
1225 size_t final_size;
1226 struct ib_sge sg;
1227 int err = 0;
1228
1229 if (WARN_ON(mr->umem->is_odp))
1230 return -EINVAL;
1231
1232 mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
1233 ib_umem_num_dma_blocks(mr->umem,
1234 1 << mr->page_shift),
1235 sizeof(*mtt), flags);
1236 if (!mtt)
1237 return -ENOMEM;
1238 orig_sg_length = sg.length;
1239
1240 cur_mtt = mtt;
1241 rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
1242 BIT(mr->page_shift)) {
1243 if (cur_mtt == (void *)mtt + sg.length) {
1244 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1245 DMA_TO_DEVICE);
1246 err = mlx5_ib_post_send_wait(dev, &wr);
1247 if (err)
1248 goto err;
1249 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1250 DMA_TO_DEVICE);
1251 wr.offset += sg.length;
1252 cur_mtt = mtt;
1253 }
1254
1255 cur_mtt->ptag =
1256 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
1257 MLX5_IB_MTT_PRESENT);
1258 cur_mtt++;
1259 }
1260
1261 final_size = (void *)cur_mtt - (void *)mtt;
1262 sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
1263 memset(cur_mtt, 0, sg.length - final_size);
1264 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
1265 wr.xlt_size = sg.length;
1266
1267 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
1268 err = mlx5_ib_post_send_wait(dev, &wr);
1269
1270err:
1271 sg.length = orig_sg_length;
1272 mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
1273 return err;
1274}
1275
395a8e4c
NO
1276/*
1277 * If ibmr is NULL it will be allocated by reg_create.
1278 * Else, the given ibmr will be used.
1279 */
1280static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
f0093fb1
JG
1281 struct ib_umem *umem, u64 iova,
1282 int access_flags, bool populate)
e126ba97
EC
1283{
1284 struct mlx5_ib_dev *dev = to_mdev(pd->device);
d5c7916f 1285 unsigned int page_size;
e126ba97 1286 struct mlx5_ib_mr *mr;
ec22eb53
SM
1287 __be64 *pas;
1288 void *mkc;
e126ba97 1289 int inlen;
ec22eb53 1290 u32 *in;
e126ba97 1291 int err;
938fe83c 1292 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
e126ba97 1293
d5c7916f
JG
1294 page_size =
1295 mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
1296 if (WARN_ON(!page_size))
1297 return ERR_PTR(-EINVAL);
1298
395a8e4c 1299 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
e126ba97
EC
1300 if (!mr)
1301 return ERR_PTR(-ENOMEM);
1302
ff740aef
IL
1303 mr->ibmr.pd = pd;
1304 mr->access_flags = access_flags;
d5c7916f 1305 mr->page_shift = order_base_2(page_size);
ff740aef
IL
1306
1307 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1308 if (populate)
d5c7916f
JG
1309 inlen += sizeof(*pas) *
1310 roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
1b9a07ee 1311 in = kvzalloc(inlen, GFP_KERNEL);
e126ba97
EC
1312 if (!in) {
1313 err = -ENOMEM;
1314 goto err_1;
1315 }
ec22eb53 1316 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
8383da3e
JG
1317 if (populate) {
1318 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
1319 err = -EINVAL;
1320 goto err_2;
1321 }
d5c7916f 1322 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
c438fde1 1323 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
8383da3e 1324 }
e126ba97 1325
ec22eb53 1326 /* The pg_access bit allows setting the access flags
cc149f75 1327 * in the page list submitted with the command. */
ec22eb53
SM
1328 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1329
1330 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
f0093fb1 1331 set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
5eb29f0d 1332 populate ? pd : dev->umrc.pd);
ff740aef 1333 MLX5_SET(mkc, mkc, free, !populate);
cdbd0d2b 1334 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
8b7ff7f3 1335 MLX5_SET(mkc, mkc, umr_en, 1);
ec22eb53 1336
f0093fb1 1337 MLX5_SET64(mkc, mkc, len, umem->length);
ec22eb53
SM
1338 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1339 MLX5_SET(mkc, mkc, translations_octword_size,
d5c7916f
JG
1340 get_octo_len(iova, umem->length, mr->page_shift));
1341 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
ff740aef
IL
1342 if (populate) {
1343 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
d5c7916f 1344 get_octo_len(iova, umem->length, mr->page_shift));
ff740aef 1345 }
ec22eb53 1346
fc6a9f86 1347 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
e126ba97
EC
1348 if (err) {
1349 mlx5_ib_warn(dev, "create mkey failed\n");
1350 goto err_2;
1351 }
aa8e08d2 1352 mr->mmkey.type = MLX5_MKEY_MR;
49780d42 1353 mr->desc_size = sizeof(struct mlx5_mtt);
7eae20db 1354 mr->dev = dev;
479163f4 1355 kvfree(in);
e126ba97 1356
a606b0f6 1357 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
e126ba97
EC
1358
1359 return mr;
1360
1361err_2:
479163f4 1362 kvfree(in);
e126ba97
EC
1363
1364err_1:
395a8e4c
NO
1365 if (!ibmr)
1366 kfree(mr);
e126ba97
EC
1367
1368 return ERR_PTR(err);
1369}
1370
ac2f7e62 1371static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
fc332570 1372 u64 length, int access_flags)
395a8e4c 1373{
a606b0f6
MB
1374 mr->ibmr.lkey = mr->mmkey.key;
1375 mr->ibmr.rkey = mr->mmkey.key;
395a8e4c 1376 mr->ibmr.length = length;
56e11d62 1377 mr->access_flags = access_flags;
395a8e4c
NO
1378}
1379
3b113a1e
AL
1380static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1381 u64 length, int acc, int mode)
6c29f57e
AL
1382{
1383 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1384 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
6c29f57e
AL
1385 struct mlx5_ib_mr *mr;
1386 void *mkc;
1387 u32 *in;
1388 int err;
1389
1390 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1391 if (!mr)
1392 return ERR_PTR(-ENOMEM);
1393
1394 in = kzalloc(inlen, GFP_KERNEL);
1395 if (!in) {
1396 err = -ENOMEM;
1397 goto err_free;
1398 }
1399
1400 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1401
3b113a1e
AL
1402 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1403 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
6c29f57e 1404 MLX5_SET64(mkc, mkc, len, length);
03232cc4 1405 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
6c29f57e 1406
fc6a9f86 1407 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
6c29f57e
AL
1408 if (err)
1409 goto err_in;
1410
1411 kfree(in);
1412
fc332570 1413 set_mr_fields(dev, mr, length, acc);
6c29f57e
AL
1414
1415 return &mr->ibmr;
1416
1417err_in:
1418 kfree(in);
1419
1420err_free:
1421 kfree(mr);
1422
1423 return ERR_PTR(err);
1424}
1425
813e90b1
MS
1426int mlx5_ib_advise_mr(struct ib_pd *pd,
1427 enum ib_uverbs_advise_mr_advice advice,
1428 u32 flags,
1429 struct ib_sge *sg_list,
1430 u32 num_sge,
1431 struct uverbs_attr_bundle *attrs)
1432{
1433 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
677cf51f
YH
1434 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1435 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
813e90b1
MS
1436 return -EOPNOTSUPP;
1437
1438 return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1439 sg_list, num_sge);
1440}
1441
6c29f57e
AL
1442struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1443 struct ib_dm_mr_attr *attr,
1444 struct uverbs_attr_bundle *attrs)
1445{
1446 struct mlx5_ib_dm *mdm = to_mdm(dm);
3b113a1e
AL
1447 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1448 u64 start_addr = mdm->dev_addr + attr->offset;
1449 int mode;
1450
1451 switch (mdm->type) {
1452 case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1453 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1454 return ERR_PTR(-EINVAL);
1455
1456 mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1457 start_addr -= pci_resource_start(dev->pdev, 0);
1458 break;
25c13324
AL
1459 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1460 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1461 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1462 return ERR_PTR(-EINVAL);
1463
1464 mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1465 break;
3b113a1e 1466 default:
6c29f57e 1467 return ERR_PTR(-EINVAL);
3b113a1e 1468 }
6c29f57e 1469
3b113a1e
AL
1470 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1471 attr->access_flags, mode);
6c29f57e
AL
1472}
1473
e126ba97
EC
1474struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1475 u64 virt_addr, int access_flags,
1476 struct ib_udata *udata)
1477{
1478 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1479 struct mlx5_ib_mr *mr = NULL;
8383da3e 1480 bool xlt_with_umr;
e126ba97 1481 struct ib_umem *umem;
e126ba97
EC
1482 int err;
1483
1b19b951 1484 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
ea30f013 1485 return ERR_PTR(-EOPNOTSUPP);
1b19b951 1486
900a6d79
EC
1487 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1488 start, virt_addr, length, access_flags);
81713d37 1489
8383da3e
JG
1490 xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
1491 /* ODP requires xlt update via umr to work. */
1492 if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
1493 return ERR_PTR(-EINVAL);
1494
13859d5d
LR
1495 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
1496 length == U64_MAX) {
8ffc3248
JG
1497 if (virt_addr != start)
1498 return ERR_PTR(-EINVAL);
81713d37
AK
1499 if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1500 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1501 return ERR_PTR(-EINVAL);
1502
b0ea0fa5 1503 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
4289861d
LR
1504 if (IS_ERR(mr))
1505 return ERR_CAST(mr);
81713d37
AK
1506 return &mr->ibmr;
1507 }
81713d37 1508
f0093fb1
JG
1509 umem = mr_umem_get(dev, start, length, access_flags);
1510 if (IS_ERR(umem))
1511 return ERR_CAST(umem);
e126ba97 1512
8383da3e 1513 if (xlt_with_umr) {
f0093fb1 1514 mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags);
2e4e706e 1515 if (IS_ERR(mr))
e126ba97 1516 mr = NULL;
e126ba97
EC
1517 }
1518
6bc1a656
ML
1519 if (!mr) {
1520 mutex_lock(&dev->slow_path_mutex);
f0093fb1
JG
1521 mr = reg_create(NULL, pd, umem, virt_addr, access_flags,
1522 !xlt_with_umr);
6bc1a656
ML
1523 mutex_unlock(&dev->slow_path_mutex);
1524 }
e126ba97
EC
1525
1526 if (IS_ERR(mr)) {
1527 err = PTR_ERR(mr);
1528 goto error;
1529 }
1530
a606b0f6 1531 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
e126ba97
EC
1532
1533 mr->umem = umem;
1c3d247e 1534 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
fc332570 1535 set_mr_fields(dev, mr, length, access_flags);
e126ba97 1536
a03bfc37 1537 if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
8383da3e
JG
1538 /*
1539 * If the MR was created with reg_create then it will be
1540 * configured properly but left disabled. It is safe to go ahead
1541 * and configure it again via UMR while enabling it.
1542 */
f1eaac37 1543 err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
ff740aef 1544 if (err) {
fbcd4983 1545 dereg_mr(dev, mr);
ff740aef
IL
1546 return ERR_PTR(err);
1547 }
1548 }
1549
aa603815
JG
1550 if (is_odp_mr(mr)) {
1551 to_ib_umem_odp(mr->umem)->private = mr;
189277f3 1552 init_waitqueue_head(&mr->q_deferred_work);
5256edcb 1553 atomic_set(&mr->num_deferred_work, 0);
806b101b
JG
1554 err = xa_err(xa_store(&dev->odp_mkeys,
1555 mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
1556 GFP_KERNEL));
1557 if (err) {
1558 dereg_mr(dev, mr);
1559 return ERR_PTR(err);
1560 }
a03bfc37
YH
1561
1562 err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
1563 if (err) {
1564 dereg_mr(dev, mr);
1565 return ERR_PTR(err);
1566 }
a6bc3875 1567 }
13859d5d 1568
ff740aef 1569 return &mr->ibmr;
e126ba97
EC
1570error:
1571 ib_umem_release(umem);
1572 return ERR_PTR(err);
1573}
1574
09689703
JG
1575/**
1576 * mlx5_mr_cache_invalidate - Fence all DMA on the MR
1577 * @mr: The MR to fence
1578 *
1579 * Upon return the NIC will not be doing any DMA to the pages under the MR,
1580 * and any DMA inprogress will be completed. Failure of this function
1581 * indicates the HW has failed catastrophically.
1582 */
1583int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
e126ba97 1584{
0025b0bd 1585 struct mlx5_umr_wr umrwr = {};
e126ba97 1586
09689703 1587 if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
89ea94a7
MG
1588 return 0;
1589
9ec4483a
YH
1590 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1591 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
7d0cc6ed 1592 umrwr.wr.opcode = MLX5_IB_WR_UMR;
09689703 1593 umrwr.pd = mr->dev->umrc.pd;
7d0cc6ed 1594 umrwr.mkey = mr->mmkey.key;
6a053953 1595 umrwr.ignore_free_state = 1;
e126ba97 1596
09689703 1597 return mlx5_ib_post_send_wait(mr->dev, &umrwr);
e126ba97
EC
1598}
1599
7d0cc6ed 1600static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
56e11d62
NO
1601 int access_flags, int flags)
1602{
1603 struct mlx5_ib_dev *dev = to_mdev(pd->device);
56e11d62 1604 struct mlx5_umr_wr umrwr = {};
56e11d62
NO
1605 int err;
1606
56e11d62
NO
1607 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1608
7d0cc6ed
AK
1609 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1610 umrwr.mkey = mr->mmkey.key;
56e11d62 1611
31616255 1612 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
56e11d62 1613 umrwr.pd = pd;
56e11d62 1614 umrwr.access_flags = access_flags;
31616255 1615 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
56e11d62
NO
1616 }
1617
d5ea2df9 1618 err = mlx5_ib_post_send_wait(dev, &umrwr);
56e11d62 1619
56e11d62
NO
1620 return err;
1621}
1622
1623int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1624 u64 length, u64 virt_addr, int new_access_flags,
1625 struct ib_pd *new_pd, struct ib_udata *udata)
1626{
1627 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1628 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1629 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1630 int access_flags = flags & IB_MR_REREG_ACCESS ?
1631 new_access_flags :
1632 mr->access_flags;
7d0cc6ed 1633 int upd_flags = 0;
b4bd701a 1634 u64 addr, len;
56e11d62
NO
1635 int err;
1636
1637 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1638 start, virt_addr, length, access_flags);
1639
b4bd701a
LR
1640 if (!mr->umem)
1641 return -EINVAL;
1642
880505cf
JG
1643 if (is_odp_mr(mr))
1644 return -EOPNOTSUPP;
1645
b4bd701a
LR
1646 if (flags & IB_MR_REREG_TRANS) {
1647 addr = virt_addr;
1648 len = length;
1649 } else {
1650 addr = mr->umem->address;
1651 len = mr->umem->length;
1652 }
1653
56e11d62
NO
1654 if (flags != IB_MR_REREG_PD) {
1655 /*
1656 * Replace umem. This needs to be done whether or not UMR is
1657 * used.
1658 */
1659 flags |= IB_MR_REREG_TRANS;
1c3d247e
JG
1660 atomic_sub(ib_umem_num_pages(mr->umem),
1661 &dev->mdev->priv.reg_pages);
56e11d62 1662 ib_umem_release(mr->umem);
f0093fb1
JG
1663 mr->umem = mr_umem_get(dev, addr, len, access_flags);
1664 if (IS_ERR(mr->umem)) {
1665 err = PTR_ERR(mr->umem);
1666 mr->umem = NULL;
4638a3b2 1667 goto err;
f0093fb1 1668 }
1c3d247e
JG
1669 atomic_add(ib_umem_num_pages(mr->umem),
1670 &dev->mdev->priv.reg_pages);
56e11d62
NO
1671 }
1672
8383da3e
JG
1673 if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
1674 access_flags) ||
1675 !mlx5_ib_can_load_pas_with_umr(dev, len) ||
1676 (flags & IB_MR_REREG_TRANS &&
1677 !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
56e11d62
NO
1678 /*
1679 * UMR can't be used - MKey needs to be replaced.
1680 */
b91e1751 1681 if (mr->cache_ent)
1769c4c5
JG
1682 detach_mr_from_cache(mr);
1683 err = destroy_mkey(dev, mr);
56e11d62 1684 if (err)
4638a3b2 1685 goto err;
56e11d62 1686
f0093fb1 1687 mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true);
4638a3b2
LR
1688 if (IS_ERR(mr)) {
1689 err = PTR_ERR(mr);
1690 mr = to_mmr(ib_mr);
1691 goto err;
1692 }
56e11d62
NO
1693 } else {
1694 /*
1695 * Send a UMR WQE
1696 */
7d0cc6ed
AK
1697 mr->ibmr.pd = pd;
1698 mr->access_flags = access_flags;
1699 mr->mmkey.iova = addr;
1700 mr->mmkey.size = len;
1701 mr->mmkey.pd = to_mpd(pd)->pdn;
1702
1703 if (flags & IB_MR_REREG_TRANS) {
1704 upd_flags = MLX5_IB_UPD_XLT_ADDR;
1705 if (flags & IB_MR_REREG_PD)
1706 upd_flags |= MLX5_IB_UPD_XLT_PD;
1707 if (flags & IB_MR_REREG_ACCESS)
1708 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
f1eaac37 1709 err = mlx5_ib_update_mr_pas(mr, upd_flags);
7d0cc6ed
AK
1710 } else {
1711 err = rereg_umr(pd, mr, access_flags, flags);
1712 }
1713
4638a3b2
LR
1714 if (err)
1715 goto err;
56e11d62
NO
1716 }
1717
fc332570 1718 set_mr_fields(dev, mr, len, access_flags);
56e11d62 1719
56e11d62 1720 return 0;
4638a3b2
LR
1721
1722err:
836a0fbb
LR
1723 ib_umem_release(mr->umem);
1724 mr->umem = NULL;
1725
4638a3b2
LR
1726 clean_mr(dev, mr);
1727 return err;
56e11d62
NO
1728}
1729
8a187ee5
SG
1730static int
1731mlx5_alloc_priv_descs(struct ib_device *device,
1732 struct mlx5_ib_mr *mr,
1733 int ndescs,
1734 int desc_size)
1735{
7ec3df17
PP
1736 struct mlx5_ib_dev *dev = to_mdev(device);
1737 struct device *ddev = &dev->mdev->pdev->dev;
8a187ee5
SG
1738 int size = ndescs * desc_size;
1739 int add_size;
1740 int ret;
1741
1742 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1743
1744 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1745 if (!mr->descs_alloc)
1746 return -ENOMEM;
1747
1748 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1749
7ec3df17
PP
1750 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
1751 if (dma_mapping_error(ddev, mr->desc_map)) {
8a187ee5
SG
1752 ret = -ENOMEM;
1753 goto err;
1754 }
1755
1756 return 0;
1757err:
1758 kfree(mr->descs_alloc);
1759
1760 return ret;
1761}
1762
1763static void
1764mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1765{
1766 if (mr->descs) {
1767 struct ib_device *device = mr->ibmr.device;
1768 int size = mr->max_descs * mr->desc_size;
7ec3df17 1769 struct mlx5_ib_dev *dev = to_mdev(device);
8a187ee5 1770
7ec3df17
PP
1771 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
1772 DMA_TO_DEVICE);
8a187ee5
SG
1773 kfree(mr->descs_alloc);
1774 mr->descs = NULL;
1775 }
1776}
1777
eeea6953 1778static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
e126ba97 1779{
8b91ffc1
SG
1780 if (mr->sig) {
1781 if (mlx5_core_destroy_psv(dev->mdev,
1782 mr->sig->psv_memory.psv_idx))
1783 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1784 mr->sig->psv_memory.psv_idx);
1785 if (mlx5_core_destroy_psv(dev->mdev,
1786 mr->sig->psv_wire.psv_idx))
1787 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1788 mr->sig->psv_wire.psv_idx);
50211ec9 1789 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
8b91ffc1
SG
1790 kfree(mr->sig);
1791 mr->sig = NULL;
1792 }
1793
b91e1751 1794 if (!mr->cache_ent) {
eeea6953 1795 destroy_mkey(dev, mr);
b9332dad
YH
1796 mlx5_free_priv_descs(mr);
1797 }
6aec21f6
HE
1798}
1799
eeea6953 1800static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
6aec21f6 1801{
6aec21f6
HE
1802 struct ib_umem *umem = mr->umem;
1803
09689703
JG
1804 /* Stop all DMA */
1805 if (is_odp_mr(mr))
1806 mlx5_ib_fence_odp_mr(mr);
1807 else
1808 clean_mr(dev, mr);
8b4d5bc5 1809
1c3d247e
JG
1810 if (umem) {
1811 if (!is_odp_mr(mr))
1812 atomic_sub(ib_umem_num_pages(umem),
1813 &dev->mdev->priv.reg_pages);
1814 ib_umem_release(umem);
1815 }
1816
b91e1751 1817 if (mr->cache_ent)
09689703
JG
1818 mlx5_mr_cache_free(dev, mr);
1819 else
1820 kfree(mr);
e126ba97
EC
1821}
1822
c4367a26 1823int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
fbcd4983 1824{
6c984472
MG
1825 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1826
de0ae958
IR
1827 if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1828 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1829 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1830 }
6c984472 1831
5256edcb
JG
1832 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
1833 mlx5_ib_free_implicit_mr(mmr);
1834 return 0;
1835 }
1836
6c984472
MG
1837 dereg_mr(to_mdev(ibmr->device), mmr);
1838
eeea6953 1839 return 0;
fbcd4983
IL
1840}
1841
7796d2a3
MG
1842static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1843 int access_mode, int page_shift)
1844{
1845 void *mkc;
1846
1847 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1848
8383da3e
JG
1849 /* This is only used from the kernel, so setting the PD is OK. */
1850 set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
7796d2a3 1851 MLX5_SET(mkc, mkc, free, 1);
7796d2a3
MG
1852 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1853 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1854 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1855 MLX5_SET(mkc, mkc, umr_en, 1);
1856 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1857}
1858
1859static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1860 int ndescs, int desc_size, int page_shift,
1861 int access_mode, u32 *in, int inlen)
1862{
1863 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1864 int err;
1865
1866 mr->access_mode = access_mode;
1867 mr->desc_size = desc_size;
1868 mr->max_descs = ndescs;
1869
1870 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1871 if (err)
1872 return err;
1873
1874 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1875
fc6a9f86 1876 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
7796d2a3
MG
1877 if (err)
1878 goto err_free_descs;
1879
1880 mr->mmkey.type = MLX5_MKEY_MR;
1881 mr->ibmr.lkey = mr->mmkey.key;
1882 mr->ibmr.rkey = mr->mmkey.key;
1883
1884 return 0;
1885
1886err_free_descs:
1887 mlx5_free_priv_descs(mr);
1888 return err;
1889}
1890
6c984472 1891static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
de0ae958
IR
1892 u32 max_num_sg, u32 max_num_meta_sg,
1893 int desc_size, int access_mode)
3121e3c4 1894{
ec22eb53 1895 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
6c984472 1896 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
7796d2a3 1897 int page_shift = 0;
ec22eb53 1898 struct mlx5_ib_mr *mr;
ec22eb53 1899 u32 *in;
b005d316 1900 int err;
3121e3c4
SG
1901
1902 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1903 if (!mr)
1904 return ERR_PTR(-ENOMEM);
1905
7796d2a3
MG
1906 mr->ibmr.pd = pd;
1907 mr->ibmr.device = pd->device;
1908
ec22eb53 1909 in = kzalloc(inlen, GFP_KERNEL);
3121e3c4
SG
1910 if (!in) {
1911 err = -ENOMEM;
1912 goto err_free;
1913 }
1914
de0ae958 1915 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
7796d2a3 1916 page_shift = PAGE_SHIFT;
3121e3c4 1917
7796d2a3
MG
1918 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1919 access_mode, in, inlen);
6c984472
MG
1920 if (err)
1921 goto err_free_in;
6c984472 1922
6c984472
MG
1923 mr->umem = NULL;
1924 kfree(in);
1925
1926 return mr;
1927
6c984472
MG
1928err_free_in:
1929 kfree(in);
1930err_free:
1931 kfree(mr);
1932 return ERR_PTR(err);
1933}
1934
7796d2a3
MG
1935static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1936 int ndescs, u32 *in, int inlen)
1937{
1938 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1939 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1940 inlen);
1941}
1942
1943static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1944 int ndescs, u32 *in, int inlen)
1945{
1946 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1947 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1948}
1949
1950static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1951 int max_num_sg, int max_num_meta_sg,
1952 u32 *in, int inlen)
1953{
1954 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1955 u32 psv_index[2];
1956 void *mkc;
1957 int err;
1958
1959 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1960 if (!mr->sig)
1961 return -ENOMEM;
1962
1963 /* create mem & wire PSVs */
1964 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1965 if (err)
1966 goto err_free_sig;
1967
1968 mr->sig->psv_memory.psv_idx = psv_index[0];
1969 mr->sig->psv_wire.psv_idx = psv_index[1];
1970
1971 mr->sig->sig_status_checked = true;
1972 mr->sig->sig_err_exists = false;
1973 /* Next UMR, Arm SIGERR */
1974 ++mr->sig->sigerr_count;
1975 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1976 sizeof(struct mlx5_klm),
1977 MLX5_MKC_ACCESS_MODE_KLMS);
1978 if (IS_ERR(mr->klm_mr)) {
1979 err = PTR_ERR(mr->klm_mr);
1980 goto err_destroy_psv;
1981 }
1982 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1983 sizeof(struct mlx5_mtt),
1984 MLX5_MKC_ACCESS_MODE_MTT);
1985 if (IS_ERR(mr->mtt_mr)) {
1986 err = PTR_ERR(mr->mtt_mr);
1987 goto err_free_klm_mr;
1988 }
1989
1990 /* Set bsf descriptors for mkey */
1991 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1992 MLX5_SET(mkc, mkc, bsf_en, 1);
1993 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1994
1995 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1996 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1997 if (err)
1998 goto err_free_mtt_mr;
1999
50211ec9
JG
2000 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
2001 mr->sig, GFP_KERNEL));
2002 if (err)
2003 goto err_free_descs;
7796d2a3
MG
2004 return 0;
2005
50211ec9
JG
2006err_free_descs:
2007 destroy_mkey(dev, mr);
2008 mlx5_free_priv_descs(mr);
7796d2a3
MG
2009err_free_mtt_mr:
2010 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
2011 mr->mtt_mr = NULL;
2012err_free_klm_mr:
2013 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
2014 mr->klm_mr = NULL;
2015err_destroy_psv:
2016 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
2017 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
2018 mr->sig->psv_memory.psv_idx);
2019 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
2020 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
2021 mr->sig->psv_wire.psv_idx);
2022err_free_sig:
2023 kfree(mr->sig);
2024
2025 return err;
2026}
2027
6c984472
MG
2028static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
2029 enum ib_mr_type mr_type, u32 max_num_sg,
2030 u32 max_num_meta_sg)
2031{
2032 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2033 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
2034 int ndescs = ALIGN(max_num_sg, 4);
2035 struct mlx5_ib_mr *mr;
6c984472
MG
2036 u32 *in;
2037 int err;
2038
2039 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2040 if (!mr)
2041 return ERR_PTR(-ENOMEM);
2042
2043 in = kzalloc(inlen, GFP_KERNEL);
2044 if (!in) {
2045 err = -ENOMEM;
2046 goto err_free;
2047 }
2048
7796d2a3
MG
2049 mr->ibmr.device = pd->device;
2050 mr->umem = NULL;
3121e3c4 2051
7796d2a3
MG
2052 switch (mr_type) {
2053 case IB_MR_TYPE_MEM_REG:
2054 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
2055 break;
2056 case IB_MR_TYPE_SG_GAPS:
2057 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
2058 break;
2059 case IB_MR_TYPE_INTEGRITY:
2060 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
2061 max_num_meta_sg, in, inlen);
2062 break;
2063 default:
9bee178b
SG
2064 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
2065 err = -EINVAL;
3121e3c4
SG
2066 }
2067
3121e3c4 2068 if (err)
7796d2a3 2069 goto err_free_in;
3121e3c4 2070
3121e3c4
SG
2071 kfree(in);
2072
2073 return &mr->ibmr;
2074
3121e3c4
SG
2075err_free_in:
2076 kfree(in);
2077err_free:
2078 kfree(mr);
2079 return ERR_PTR(err);
2080}
2081
6c984472 2082struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
42a3b153 2083 u32 max_num_sg)
6c984472
MG
2084{
2085 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
2086}
2087
2088struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
2089 u32 max_num_sg, u32 max_num_meta_sg)
2090{
2091 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
2092 max_num_meta_sg);
2093}
2094
d18bb3e1 2095int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
d2370e0a 2096{
d18bb3e1 2097 struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
ec22eb53 2098 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
d18bb3e1 2099 struct mlx5_ib_mw *mw = to_mmw(ibmw);
ec22eb53
SM
2100 u32 *in = NULL;
2101 void *mkc;
d2370e0a
MB
2102 int ndescs;
2103 int err;
2104 struct mlx5_ib_alloc_mw req = {};
2105 struct {
2106 __u32 comp_mask;
2107 __u32 response_length;
2108 } resp = {};
2109
2110 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
2111 if (err)
d18bb3e1 2112 return err;
d2370e0a
MB
2113
2114 if (req.comp_mask || req.reserved1 || req.reserved2)
d18bb3e1 2115 return -EOPNOTSUPP;
d2370e0a
MB
2116
2117 if (udata->inlen > sizeof(req) &&
2118 !ib_is_udata_cleared(udata, sizeof(req),
2119 udata->inlen - sizeof(req)))
d18bb3e1 2120 return -EOPNOTSUPP;
d2370e0a
MB
2121
2122 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
2123
ec22eb53 2124 in = kzalloc(inlen, GFP_KERNEL);
d18bb3e1 2125 if (!in) {
d2370e0a
MB
2126 err = -ENOMEM;
2127 goto free;
2128 }
2129
ec22eb53
SM
2130 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2131
2132 MLX5_SET(mkc, mkc, free, 1);
2133 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
d18bb3e1 2134 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
ec22eb53
SM
2135 MLX5_SET(mkc, mkc, umr_en, 1);
2136 MLX5_SET(mkc, mkc, lr, 1);
cdbd0d2b 2137 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
d18bb3e1 2138 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
ec22eb53
SM
2139 MLX5_SET(mkc, mkc, qpn, 0xffffff);
2140
fc6a9f86 2141 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
d2370e0a
MB
2142 if (err)
2143 goto free;
2144
aa8e08d2 2145 mw->mmkey.type = MLX5_MKEY_MW;
d18bb3e1 2146 ibmw->rkey = mw->mmkey.key;
db570d7d 2147 mw->ndescs = ndescs;
d2370e0a 2148
70c1430f
LR
2149 resp.response_length =
2150 min(offsetofend(typeof(resp), response_length), udata->outlen);
d2370e0a
MB
2151 if (resp.response_length) {
2152 err = ib_copy_to_udata(udata, &resp, resp.response_length);
d18bb3e1
LR
2153 if (err)
2154 goto free_mkey;
d2370e0a
MB
2155 }
2156
806b101b
JG
2157 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2158 err = xa_err(xa_store(&dev->odp_mkeys,
2159 mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
2160 GFP_KERNEL));
2161 if (err)
2162 goto free_mkey;
2163 }
2164
d2370e0a 2165 kfree(in);
d18bb3e1 2166 return 0;
d2370e0a 2167
806b101b
JG
2168free_mkey:
2169 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
d2370e0a 2170free:
d2370e0a 2171 kfree(in);
d18bb3e1 2172 return err;
d2370e0a
MB
2173}
2174
2175int mlx5_ib_dealloc_mw(struct ib_mw *mw)
2176{
04177915 2177 struct mlx5_ib_dev *dev = to_mdev(mw->device);
d2370e0a 2178 struct mlx5_ib_mw *mmw = to_mmw(mw);
d2370e0a 2179
04177915 2180 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
806b101b 2181 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
04177915
JG
2182 /*
2183 * pagefault_single_data_segment() may be accessing mmw under
2184 * SRCU if the user bound an ODP MR to this MW.
2185 */
806b101b 2186 synchronize_srcu(&dev->odp_srcu);
04177915
JG
2187 }
2188
d18bb3e1 2189 return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
d2370e0a
MB
2190}
2191
d5436ba0
SG
2192int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
2193 struct ib_mr_status *mr_status)
2194{
2195 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2196 int ret = 0;
2197
2198 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2199 pr_err("Invalid status check mask\n");
2200 ret = -EINVAL;
2201 goto done;
2202 }
2203
2204 mr_status->fail_status = 0;
2205 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2206 if (!mmr->sig) {
2207 ret = -EINVAL;
2208 pr_err("signature status check requested on a non-signature enabled MR\n");
2209 goto done;
2210 }
2211
2212 mmr->sig->sig_status_checked = true;
2213 if (!mmr->sig->sig_err_exists)
2214 goto done;
2215
2216 if (ibmr->lkey == mmr->sig->err_item.key)
2217 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2218 sizeof(mr_status->sig_err));
2219 else {
2220 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2221 mr_status->sig_err.sig_err_offset = 0;
2222 mr_status->sig_err.key = mmr->sig->err_item.key;
2223 }
2224
2225 mmr->sig->sig_err_exists = false;
2226 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2227 }
2228
2229done:
2230 return ret;
2231}
8a187ee5 2232
2563e2f3
MG
2233static int
2234mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2235 int data_sg_nents, unsigned int *data_sg_offset,
2236 struct scatterlist *meta_sg, int meta_sg_nents,
2237 unsigned int *meta_sg_offset)
2238{
2239 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2240 unsigned int sg_offset = 0;
2241 int n = 0;
2242
2243 mr->meta_length = 0;
2244 if (data_sg_nents == 1) {
2245 n++;
2246 mr->ndescs = 1;
2247 if (data_sg_offset)
2248 sg_offset = *data_sg_offset;
2249 mr->data_length = sg_dma_len(data_sg) - sg_offset;
2250 mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2251 if (meta_sg_nents == 1) {
2252 n++;
2253 mr->meta_ndescs = 1;
2254 if (meta_sg_offset)
2255 sg_offset = *meta_sg_offset;
2256 else
2257 sg_offset = 0;
2258 mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2259 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2260 }
2261 ibmr->length = mr->data_length + mr->meta_length;
2262 }
2263
2264 return n;
2265}
2266
b005d316
SG
2267static int
2268mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2269 struct scatterlist *sgl,
ff2ba993 2270 unsigned short sg_nents,
6c984472
MG
2271 unsigned int *sg_offset_p,
2272 struct scatterlist *meta_sgl,
2273 unsigned short meta_sg_nents,
2274 unsigned int *meta_sg_offset_p)
b005d316
SG
2275{
2276 struct scatterlist *sg = sgl;
2277 struct mlx5_klm *klms = mr->descs;
9aa8b321 2278 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
b005d316 2279 u32 lkey = mr->ibmr.pd->local_dma_lkey;
6c984472 2280 int i, j = 0;
b005d316 2281
ff2ba993 2282 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
b005d316 2283 mr->ibmr.length = 0;
b005d316
SG
2284
2285 for_each_sg(sgl, sg, sg_nents, i) {
99975cd4 2286 if (unlikely(i >= mr->max_descs))
b005d316 2287 break;
ff2ba993
CH
2288 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2289 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
b005d316 2290 klms[i].key = cpu_to_be32(lkey);
0a49f2c3 2291 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
ff2ba993
CH
2292
2293 sg_offset = 0;
b005d316
SG
2294 }
2295
9aa8b321
BVA
2296 if (sg_offset_p)
2297 *sg_offset_p = sg_offset;
2298
6c984472
MG
2299 mr->ndescs = i;
2300 mr->data_length = mr->ibmr.length;
2301
2302 if (meta_sg_nents) {
2303 sg = meta_sgl;
2304 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2305 for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2306 if (unlikely(i + j >= mr->max_descs))
2307 break;
2308 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2309 sg_offset);
2310 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2311 sg_offset);
2312 klms[i + j].key = cpu_to_be32(lkey);
2313 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2314
2315 sg_offset = 0;
2316 }
2317 if (meta_sg_offset_p)
2318 *meta_sg_offset_p = sg_offset;
2319
2320 mr->meta_ndescs = j;
2321 mr->meta_length = mr->ibmr.length - mr->data_length;
2322 }
2323
2324 return i + j;
b005d316
SG
2325}
2326
8a187ee5
SG
2327static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2328{
2329 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2330 __be64 *descs;
2331
2332 if (unlikely(mr->ndescs == mr->max_descs))
2333 return -ENOMEM;
2334
2335 descs = mr->descs;
2336 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2337
2338 return 0;
2339}
2340
de0ae958
IR
2341static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2342{
2343 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2344 __be64 *descs;
2345
2346 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2347 return -ENOMEM;
2348
2349 descs = mr->descs;
2350 descs[mr->ndescs + mr->meta_ndescs++] =
2351 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2352
2353 return 0;
2354}
2355
2356static int
2357mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
6c984472
MG
2358 int data_sg_nents, unsigned int *data_sg_offset,
2359 struct scatterlist *meta_sg, int meta_sg_nents,
2360 unsigned int *meta_sg_offset)
2361{
2362 struct mlx5_ib_mr *mr = to_mmr(ibmr);
de0ae958 2363 struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
6c984472
MG
2364 int n;
2365
de0ae958
IR
2366 pi_mr->ndescs = 0;
2367 pi_mr->meta_ndescs = 0;
2368 pi_mr->meta_length = 0;
2369
2370 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2371 pi_mr->desc_size * pi_mr->max_descs,
2372 DMA_TO_DEVICE);
2373
2374 pi_mr->ibmr.page_size = ibmr->page_size;
2375 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2376 mlx5_set_page);
2377 if (n != data_sg_nents)
2378 return n;
2379
2563e2f3 2380 pi_mr->data_iova = pi_mr->ibmr.iova;
de0ae958
IR
2381 pi_mr->data_length = pi_mr->ibmr.length;
2382 pi_mr->ibmr.length = pi_mr->data_length;
2383 ibmr->length = pi_mr->data_length;
2384
2385 if (meta_sg_nents) {
2386 u64 page_mask = ~((u64)ibmr->page_size - 1);
2563e2f3 2387 u64 iova = pi_mr->data_iova;
de0ae958
IR
2388
2389 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2390 meta_sg_offset, mlx5_set_page_pi);
2391
2392 pi_mr->meta_length = pi_mr->ibmr.length;
2393 /*
2394 * PI address for the HW is the offset of the metadata address
2395 * relative to the first data page address.
2396 * It equals to first data page address + size of data pages +
2397 * metadata offset at the first metadata page
2398 */
2399 pi_mr->pi_iova = (iova & page_mask) +
2400 pi_mr->ndescs * ibmr->page_size +
2401 (pi_mr->ibmr.iova & ~page_mask);
2402 /*
2403 * In order to use one MTT MR for data and metadata, we register
2404 * also the gaps between the end of the data and the start of
2405 * the metadata (the sig MR will verify that the HW will access
2406 * to right addresses). This mapping is safe because we use
2407 * internal mkey for the registration.
2408 */
2409 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2410 pi_mr->ibmr.iova = iova;
2411 ibmr->length += pi_mr->meta_length;
2412 }
2413
2414 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2415 pi_mr->desc_size * pi_mr->max_descs,
2416 DMA_TO_DEVICE);
2417
2418 return n;
2419}
2420
2421static int
2422mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2423 int data_sg_nents, unsigned int *data_sg_offset,
2424 struct scatterlist *meta_sg, int meta_sg_nents,
2425 unsigned int *meta_sg_offset)
2426{
2427 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2428 struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2429 int n;
6c984472
MG
2430
2431 pi_mr->ndescs = 0;
2432 pi_mr->meta_ndescs = 0;
2433 pi_mr->meta_length = 0;
2434
2435 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2436 pi_mr->desc_size * pi_mr->max_descs,
2437 DMA_TO_DEVICE);
2438
2439 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2440 meta_sg, meta_sg_nents, meta_sg_offset);
2441
de0ae958
IR
2442 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2443 pi_mr->desc_size * pi_mr->max_descs,
2444 DMA_TO_DEVICE);
2445
6c984472 2446 /* This is zero-based memory region */
2563e2f3 2447 pi_mr->data_iova = 0;
6c984472 2448 pi_mr->ibmr.iova = 0;
de0ae958 2449 pi_mr->pi_iova = pi_mr->data_length;
6c984472 2450 ibmr->length = pi_mr->ibmr.length;
6c984472 2451
de0ae958
IR
2452 return n;
2453}
6c984472 2454
de0ae958
IR
2455int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2456 int data_sg_nents, unsigned int *data_sg_offset,
2457 struct scatterlist *meta_sg, int meta_sg_nents,
2458 unsigned int *meta_sg_offset)
2459{
2460 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2563e2f3 2461 struct mlx5_ib_mr *pi_mr = NULL;
de0ae958
IR
2462 int n;
2463
2464 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2465
2563e2f3
MG
2466 mr->ndescs = 0;
2467 mr->data_length = 0;
2468 mr->data_iova = 0;
2469 mr->meta_ndescs = 0;
2470 mr->pi_iova = 0;
2471 /*
2472 * As a performance optimization, if possible, there is no need to
2473 * perform UMR operation to register the data/metadata buffers.
2474 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2475 * Fallback to UMR only in case of a failure.
2476 */
2477 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2478 data_sg_offset, meta_sg, meta_sg_nents,
2479 meta_sg_offset);
2480 if (n == data_sg_nents + meta_sg_nents)
2481 goto out;
de0ae958
IR
2482 /*
2483 * As a performance optimization, if possible, there is no need to map
2484 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2485 * descriptors and fallback to KLM only in case of a failure.
2486 * It's more efficient for the HW to work with MTT descriptors
2487 * (especially in high load).
2488 * Use KLM (indirect access) only if it's mandatory.
2489 */
2563e2f3 2490 pi_mr = mr->mtt_mr;
de0ae958
IR
2491 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2492 data_sg_offset, meta_sg, meta_sg_nents,
2493 meta_sg_offset);
2494 if (n == data_sg_nents + meta_sg_nents)
2495 goto out;
2496
2497 pi_mr = mr->klm_mr;
2498 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2499 data_sg_offset, meta_sg, meta_sg_nents,
2500 meta_sg_offset);
6c984472
MG
2501 if (unlikely(n != data_sg_nents + meta_sg_nents))
2502 return -ENOMEM;
2503
de0ae958
IR
2504out:
2505 /* This is zero-based memory region */
2506 ibmr->iova = 0;
2507 mr->pi_mr = pi_mr;
2563e2f3
MG
2508 if (pi_mr)
2509 ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2510 else
2511 ibmr->sig_attrs->meta_length = mr->meta_length;
de0ae958 2512
6c984472
MG
2513 return 0;
2514}
2515
ff2ba993 2516int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
9aa8b321 2517 unsigned int *sg_offset)
8a187ee5
SG
2518{
2519 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2520 int n;
2521
2522 mr->ndescs = 0;
2523
2524 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2525 mr->desc_size * mr->max_descs,
2526 DMA_TO_DEVICE);
2527
ec22eb53 2528 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
6c984472
MG
2529 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2530 NULL);
b005d316 2531 else
ff2ba993
CH
2532 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2533 mlx5_set_page);
8a187ee5
SG
2534
2535 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2536 mr->desc_size * mr->max_descs,
2537 DMA_TO_DEVICE);
2538
2539 return n;
2540}