drm/xe: unlock on error path in xe_vm_add_compute_exec_queue()
[linux-block.git] / drivers / gpu / drm / xe / xe_vm.c
CommitLineData
dd08ebf6
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_vm.h"
7
8#include <linux/dma-fence-array.h>
e1fbc4f1 9#include <linux/nospec.h>
dd08ebf6 10
d490ecf5 11#include <drm/drm_exec.h>
437bcbab 12#include <drm/drm_print.h>
dd08ebf6
MB
13#include <drm/ttm/ttm_execbuf_util.h>
14#include <drm/ttm/ttm_tt.h>
15#include <drm/xe_drm.h>
9ca14f94 16#include <linux/delay.h>
dd08ebf6
MB
17#include <linux/kthread.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20
28523083 21#include "xe_assert.h"
dd08ebf6
MB
22#include "xe_bo.h"
23#include "xe_device.h"
2ff00c4f 24#include "xe_drm_client.h"
c22a4ed0 25#include "xe_exec_queue.h"
dd08ebf6
MB
26#include "xe_gt.h"
27#include "xe_gt_pagefault.h"
a9351846 28#include "xe_gt_tlb_invalidation.h"
dd08ebf6 29#include "xe_migrate.h"
e1fbc4f1 30#include "xe_pat.h"
dd08ebf6
MB
31#include "xe_pm.h"
32#include "xe_preempt_fence.h"
33#include "xe_pt.h"
34#include "xe_res_cursor.h"
dd08ebf6 35#include "xe_sync.h"
ea9f879d 36#include "xe_trace.h"
7f6c6e50
OZ
37#include "generated/xe_wa_oob.h"
38#include "xe_wa.h"
dd08ebf6
MB
39
40#define TEST_VM_ASYNC_OPS_ERROR
41
2714d509
TH
42static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43{
44 return vm->gpuvm.r_obj;
45}
46
dd08ebf6
MB
47/**
48 * xe_vma_userptr_check_repin() - Advisory check for repin needed
49 * @vma: The userptr vma
50 *
51 * Check if the userptr vma has been invalidated since last successful
52 * repin. The check is advisory only and can the function can be called
53 * without the vm->userptr.notifier_lock held. There is no guarantee that the
54 * vma userptr will remain valid after a lockless check, so typically
55 * the call needs to be followed by a proper check under the notifier_lock.
56 *
57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58 */
59int xe_vma_userptr_check_repin(struct xe_vma *vma)
60{
61 return mmu_interval_check_retry(&vma->userptr.notifier,
62 vma->userptr.notifier_seq) ?
63 -EAGAIN : 0;
64}
65
66int xe_vma_userptr_pin_pages(struct xe_vma *vma)
67{
21ed3327 68 struct xe_vm *vm = xe_vma_vm(vma);
dd08ebf6 69 struct xe_device *xe = vm->xe;
21ed3327 70 const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
dd08ebf6
MB
71 struct page **pages;
72 bool in_kthread = !current->mm;
73 unsigned long notifier_seq;
74 int pinned, ret, i;
21ed3327 75 bool read_only = xe_vma_read_only(vma);
dd08ebf6
MB
76
77 lockdep_assert_held(&vm->lock);
c73acc1e 78 xe_assert(xe, xe_vma_is_userptr(vma));
dd08ebf6 79retry:
b06d47be 80 if (vma->gpuva.flags & XE_VMA_DESTROYED)
dd08ebf6
MB
81 return 0;
82
83 notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
84 if (notifier_seq == vma->userptr.notifier_seq)
85 return 0;
86
87 pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
88 if (!pages)
89 return -ENOMEM;
90
91 if (vma->userptr.sg) {
92 dma_unmap_sgtable(xe->drm.dev,
93 vma->userptr.sg,
94 read_only ? DMA_TO_DEVICE :
95 DMA_BIDIRECTIONAL, 0);
96 sg_free_table(vma->userptr.sg);
97 vma->userptr.sg = NULL;
98 }
99
100 pinned = ret = 0;
101 if (in_kthread) {
102 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
103 ret = -EFAULT;
104 goto mm_closed;
105 }
106 kthread_use_mm(vma->userptr.notifier.mm);
107 }
108
109 while (pinned < num_pages) {
21ed3327
MB
110 ret = get_user_pages_fast(xe_vma_userptr(vma) +
111 pinned * PAGE_SIZE,
dd08ebf6
MB
112 num_pages - pinned,
113 read_only ? 0 : FOLL_WRITE,
114 &pages[pinned]);
115 if (ret < 0) {
116 if (in_kthread)
117 ret = 0;
118 break;
119 }
120
121 pinned += ret;
122 ret = 0;
123 }
124
125 if (in_kthread) {
126 kthread_unuse_mm(vma->userptr.notifier.mm);
127 mmput(vma->userptr.notifier.mm);
128 }
129mm_closed:
130 if (ret)
131 goto out;
132
1b1d3710
NV
133 ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
134 pinned, 0,
135 (u64)pinned << PAGE_SHIFT,
136 xe_sg_segment_size(xe->drm.dev),
137 GFP_KERNEL);
dd08ebf6
MB
138 if (ret) {
139 vma->userptr.sg = NULL;
140 goto out;
141 }
142 vma->userptr.sg = &vma->userptr.sgt;
143
144 ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
145 read_only ? DMA_TO_DEVICE :
146 DMA_BIDIRECTIONAL,
147 DMA_ATTR_SKIP_CPU_SYNC |
148 DMA_ATTR_NO_KERNEL_MAPPING);
149 if (ret) {
150 sg_free_table(vma->userptr.sg);
151 vma->userptr.sg = NULL;
152 goto out;
153 }
154
155 for (i = 0; i < pinned; ++i) {
156 if (!read_only) {
157 lock_page(pages[i]);
158 set_page_dirty(pages[i]);
159 unlock_page(pages[i]);
160 }
161
162 mark_page_accessed(pages[i]);
163 }
164
165out:
166 release_pages(pages, pinned);
167 kvfree(pages);
168
169 if (!(ret < 0)) {
170 vma->userptr.notifier_seq = notifier_seq;
171 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
172 goto retry;
173 }
174
175 return ret < 0 ? ret : 0;
176}
177
178static bool preempt_fences_waiting(struct xe_vm *vm)
179{
9b9529ce 180 struct xe_exec_queue *q;
dd08ebf6
MB
181
182 lockdep_assert_held(&vm->lock);
183 xe_vm_assert_held(vm);
184
9b9529ce
FD
185 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
186 if (!q->compute.pfence ||
187 (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
188 &q->compute.pfence->flags))) {
dd08ebf6
MB
189 return true;
190 }
191 }
192
193 return false;
194}
195
196static void free_preempt_fences(struct list_head *list)
197{
198 struct list_head *link, *next;
199
200 list_for_each_safe(link, next, list)
201 xe_preempt_fence_free(to_preempt_fence_from_link(link));
202}
203
204static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
205 unsigned int *count)
206{
207 lockdep_assert_held(&vm->lock);
208 xe_vm_assert_held(vm);
209
9b9529ce 210 if (*count >= vm->preempt.num_exec_queues)
dd08ebf6
MB
211 return 0;
212
9b9529ce 213 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
dd08ebf6
MB
214 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
215
216 if (IS_ERR(pfence))
217 return PTR_ERR(pfence);
218
219 list_move_tail(xe_preempt_fence_link(pfence), list);
220 }
221
222 return 0;
223}
224
225static int wait_for_existing_preempt_fences(struct xe_vm *vm)
226{
9b9529ce 227 struct xe_exec_queue *q;
dd08ebf6
MB
228
229 xe_vm_assert_held(vm);
230
9b9529ce
FD
231 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
232 if (q->compute.pfence) {
233 long timeout = dma_fence_wait(q->compute.pfence, false);
dd08ebf6
MB
234
235 if (timeout < 0)
236 return -ETIME;
9b9529ce
FD
237 dma_fence_put(q->compute.pfence);
238 q->compute.pfence = NULL;
dd08ebf6
MB
239 }
240 }
241
242 return 0;
243}
244
8e41443e
TH
245static bool xe_vm_is_idle(struct xe_vm *vm)
246{
9b9529ce 247 struct xe_exec_queue *q;
8e41443e
TH
248
249 xe_vm_assert_held(vm);
9b9529ce
FD
250 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
251 if (!xe_exec_queue_is_idle(q))
8e41443e
TH
252 return false;
253 }
254
255 return true;
256}
257
dd08ebf6
MB
258static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
259{
260 struct list_head *link;
9b9529ce 261 struct xe_exec_queue *q;
dd08ebf6 262
9b9529ce 263 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
dd08ebf6
MB
264 struct dma_fence *fence;
265
266 link = list->next;
c73acc1e 267 xe_assert(vm->xe, link != list);
dd08ebf6
MB
268
269 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
9b9529ce
FD
270 q, q->compute.context,
271 ++q->compute.seqno);
272 dma_fence_put(q->compute.pfence);
273 q->compute.pfence = fence;
dd08ebf6
MB
274 }
275}
276
277static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
278{
9b9529ce 279 struct xe_exec_queue *q;
dd08ebf6
MB
280 int err;
281
20277d8c
MB
282 if (!vm->preempt.num_exec_queues)
283 return 0;
284
08a4f00e 285 err = xe_bo_lock(bo, true);
dd08ebf6
MB
286 if (err)
287 return err;
288
08a4f00e
TH
289 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
290 if (err)
291 goto out_unlock;
292
9b9529ce
FD
293 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
294 if (q->compute.pfence) {
dd08ebf6 295 dma_resv_add_fence(bo->ttm.base.resv,
9b9529ce 296 q->compute.pfence,
dd08ebf6
MB
297 DMA_RESV_USAGE_BOOKKEEP);
298 }
299
08a4f00e
TH
300out_unlock:
301 xe_bo_unlock(bo);
302 return err;
dd08ebf6
MB
303}
304
24f947d5
TH
305static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
306 struct drm_exec *exec)
dd08ebf6 307{
9b9529ce 308 struct xe_exec_queue *q;
dd08ebf6
MB
309
310 lockdep_assert_held(&vm->lock);
311 xe_vm_assert_held(vm);
312
9b9529ce
FD
313 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
314 q->ops->resume(q);
dd08ebf6 315
24f947d5
TH
316 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
317 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
dd08ebf6
MB
318 }
319}
320
9b9529ce 321int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
dd08ebf6 322{
24f947d5
TH
323 struct drm_gpuvm_exec vm_exec = {
324 .vm = &vm->gpuvm,
325 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
326 .num_fences = 1,
327 };
328 struct drm_exec *exec = &vm_exec.exec;
dd08ebf6
MB
329 struct dma_fence *pfence;
330 int err;
331 bool wait;
332
fdb6a053 333 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
dd08ebf6
MB
334
335 down_write(&vm->lock);
24f947d5
TH
336 err = drm_gpuvm_exec_lock(&vm_exec);
337 if (err)
cf46019e 338 goto out_up_write;
dd08ebf6 339
9b9529ce
FD
340 pfence = xe_preempt_fence_create(q, q->compute.context,
341 ++q->compute.seqno);
dd08ebf6
MB
342 if (!pfence) {
343 err = -ENOMEM;
cf46019e 344 goto out_fini;
dd08ebf6
MB
345 }
346
9b9529ce
FD
347 list_add(&q->compute.link, &vm->preempt.exec_queues);
348 ++vm->preempt.num_exec_queues;
349 q->compute.pfence = pfence;
dd08ebf6
MB
350
351 down_read(&vm->userptr.notifier_lock);
352
24f947d5
TH
353 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
354 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
dd08ebf6
MB
355
356 /*
357 * Check to see if a preemption on VM is in flight or userptr
358 * invalidation, if so trigger this preempt fence to sync state with
359 * other preempt fences on the VM.
360 */
361 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
362 if (wait)
363 dma_fence_enable_sw_signaling(pfence);
364
365 up_read(&vm->userptr.notifier_lock);
366
cf46019e 367out_fini:
24f947d5 368 drm_exec_fini(exec);
cf46019e 369out_up_write:
dd08ebf6
MB
370 up_write(&vm->lock);
371
372 return err;
373}
374
abce4e4b
MB
375/**
376 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
377 * @vm: The VM.
378 * @q: The exec_queue
379 */
380void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
381{
fdb6a053 382 if (!xe_vm_in_preempt_fence_mode(vm))
abce4e4b
MB
383 return;
384
385 down_write(&vm->lock);
386 list_del(&q->compute.link);
387 --vm->preempt.num_exec_queues;
388 if (q->compute.pfence) {
389 dma_fence_enable_sw_signaling(q->compute.pfence);
390 dma_fence_put(q->compute.pfence);
391 q->compute.pfence = NULL;
392 }
393 up_write(&vm->lock);
394}
395
dd08ebf6
MB
396/**
397 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
398 * that need repinning.
399 * @vm: The VM.
400 *
401 * This function checks for whether the VM has userptrs that need repinning,
402 * and provides a release-type barrier on the userptr.notifier_lock after
403 * checking.
404 *
405 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
406 */
407int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
408{
409 lockdep_assert_held_read(&vm->userptr.notifier_lock);
410
411 return (list_empty(&vm->userptr.repin_list) &&
412 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
413}
414
9ca14f94
NV
415#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
416
9d858b69
MB
417static void xe_vm_kill(struct xe_vm *vm)
418{
9b9529ce 419 struct xe_exec_queue *q;
9d858b69
MB
420
421 lockdep_assert_held(&vm->lock);
422
d00e9cc2 423 xe_vm_lock(vm, false);
9d858b69
MB
424 vm->flags |= XE_VM_FLAG_BANNED;
425 trace_xe_vm_kill(vm);
426
9b9529ce
FD
427 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
428 q->ops->kill(q);
d00e9cc2 429 xe_vm_unlock(vm);
9d858b69
MB
430
431 /* TODO: Inform user the VM is banned */
432}
433
d490ecf5
TH
434/**
435 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
436 * @exec: The drm_exec object used for locking before validation.
437 * @err: The error returned from ttm_bo_validate().
438 * @end: A ktime_t cookie that should be set to 0 before first use and
439 * that should be reused on subsequent calls.
440 *
441 * With multiple active VMs, under memory pressure, it is possible that
442 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
443 * Until ttm properly handles locking in such scenarios, best thing the
444 * driver can do is retry with a timeout. Check if that is necessary, and
445 * if so unlock the drm_exec's objects while keeping the ticket to prepare
446 * for a rerun.
447 *
448 * Return: true if a retry after drm_exec_init() is recommended;
449 * false otherwise.
450 */
451bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
452{
453 ktime_t cur;
454
455 if (err != -ENOMEM)
456 return false;
457
458 cur = ktime_get();
459 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
460 if (!ktime_before(cur, *end))
461 return false;
462
d490ecf5
TH
463 msleep(20);
464 return true;
465}
466
24f947d5
TH
467static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
468{
469 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
470 struct drm_gpuva *gpuva;
471 int ret;
472
473 lockdep_assert_held(&vm->lock);
474 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
475 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
476 &vm->rebind_list);
477
478 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
479 if (ret)
480 return ret;
481
482 vm_bo->evicted = false;
483 return 0;
484}
485
d490ecf5
TH
486static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
487 bool *done)
488{
d490ecf5
TH
489 int err;
490
30278e29
MB
491 /*
492 * 1 fence for each preempt fence plus a fence for each tile from a
493 * possible rebind
494 */
24f947d5 495 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
30278e29 496 vm->xe->info.tile_count);
d490ecf5
TH
497 if (err)
498 return err;
499
500 if (xe_vm_is_idle(vm)) {
501 vm->preempt.rebind_deactivated = true;
502 *done = true;
503 return 0;
504 }
505
506 if (!preempt_fences_waiting(vm)) {
507 *done = true;
508 return 0;
509 }
510
24f947d5 511 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
d490ecf5
TH
512 if (err)
513 return err;
514
515 err = wait_for_existing_preempt_fences(vm);
516 if (err)
517 return err;
518
24f947d5 519 return drm_gpuvm_validate(&vm->gpuvm, exec);
d490ecf5
TH
520}
521
dd08ebf6
MB
522static void preempt_rebind_work_func(struct work_struct *w)
523{
524 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
d490ecf5 525 struct drm_exec exec;
dd08ebf6
MB
526 struct dma_fence *rebind_fence;
527 unsigned int fence_count = 0;
528 LIST_HEAD(preempt_fences);
9ca14f94 529 ktime_t end = 0;
f3e9b1f4 530 int err = 0;
dd08ebf6
MB
531 long wait;
532 int __maybe_unused tries = 0;
533
fdb6a053 534 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
dd08ebf6
MB
535 trace_xe_vm_rebind_worker_enter(vm);
536
9d858b69
MB
537 down_write(&vm->lock);
538
539 if (xe_vm_is_closed_or_banned(vm)) {
540 up_write(&vm->lock);
dd08ebf6
MB
541 trace_xe_vm_rebind_worker_exit(vm);
542 return;
543 }
544
dd08ebf6 545retry:
dd08ebf6
MB
546 if (xe_vm_userptr_check_repin(vm)) {
547 err = xe_vm_userptr_pin(vm);
548 if (err)
549 goto out_unlock_outer;
550 }
551
d490ecf5 552 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
8e41443e 553
d490ecf5
TH
554 drm_exec_until_all_locked(&exec) {
555 bool done = false;
dd08ebf6 556
d490ecf5
TH
557 err = xe_preempt_work_begin(&exec, vm, &done);
558 drm_exec_retry_on_contention(&exec);
24f947d5
TH
559 if (err || done) {
560 drm_exec_fini(&exec);
561 if (err && xe_vm_validate_should_retry(&exec, err, &end))
562 err = -EAGAIN;
563
d490ecf5
TH
564 goto out_unlock_outer;
565 }
d490ecf5 566 }
dd08ebf6
MB
567
568 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
569 if (err)
570 goto out_unlock;
571
dd08ebf6
MB
572 rebind_fence = xe_vm_rebind(vm, true);
573 if (IS_ERR(rebind_fence)) {
574 err = PTR_ERR(rebind_fence);
575 goto out_unlock;
576 }
577
578 if (rebind_fence) {
579 dma_fence_wait(rebind_fence, false);
580 dma_fence_put(rebind_fence);
581 }
582
583 /* Wait on munmap style VM unbinds */
b06d47be 584 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
dd08ebf6
MB
585 DMA_RESV_USAGE_KERNEL,
586 false, MAX_SCHEDULE_TIMEOUT);
587 if (wait <= 0) {
588 err = -ETIME;
589 goto out_unlock;
590 }
591
592#define retry_required(__tries, __vm) \
593 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
594 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
595 __xe_vm_userptr_needs_repin(__vm))
596
597 down_read(&vm->userptr.notifier_lock);
598 if (retry_required(tries, vm)) {
599 up_read(&vm->userptr.notifier_lock);
600 err = -EAGAIN;
601 goto out_unlock;
602 }
603
604#undef retry_required
605
7ba4c5f0
MB
606 spin_lock(&vm->xe->ttm.lru_lock);
607 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
608 spin_unlock(&vm->xe->ttm.lru_lock);
609
dd08ebf6
MB
610 /* Point of no return. */
611 arm_preempt_fences(vm, &preempt_fences);
24f947d5 612 resume_and_reinstall_preempt_fences(vm, &exec);
dd08ebf6
MB
613 up_read(&vm->userptr.notifier_lock);
614
615out_unlock:
d490ecf5 616 drm_exec_fini(&exec);
dd08ebf6
MB
617out_unlock_outer:
618 if (err == -EAGAIN) {
619 trace_xe_vm_rebind_worker_retry(vm);
620 goto retry;
621 }
9ca14f94 622
9d858b69
MB
623 if (err) {
624 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
625 xe_vm_kill(vm);
626 }
dd08ebf6
MB
627 up_write(&vm->lock);
628
629 free_preempt_fences(&preempt_fences);
630
dd08ebf6
MB
631 trace_xe_vm_rebind_worker_exit(vm);
632}
633
dd08ebf6
MB
634static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
635 const struct mmu_notifier_range *range,
636 unsigned long cur_seq)
637{
638 struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
21ed3327 639 struct xe_vm *vm = xe_vma_vm(vma);
dd08ebf6
MB
640 struct dma_resv_iter cursor;
641 struct dma_fence *fence;
642 long err;
643
c73acc1e 644 xe_assert(vm->xe, xe_vma_is_userptr(vma));
dd08ebf6
MB
645 trace_xe_vma_userptr_invalidate(vma);
646
647 if (!mmu_notifier_range_blockable(range))
648 return false;
649
650 down_write(&vm->userptr.notifier_lock);
651 mmu_interval_set_seq(mni, cur_seq);
652
653 /* No need to stop gpu access if the userptr is not yet bound. */
654 if (!vma->userptr.initial_bind) {
655 up_write(&vm->userptr.notifier_lock);
656 return true;
657 }
658
659 /*
660 * Tell exec and rebind worker they need to repin and rebind this
661 * userptr.
662 */
b06d47be
MB
663 if (!xe_vm_in_fault_mode(vm) &&
664 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
dd08ebf6
MB
665 spin_lock(&vm->userptr.invalidated_lock);
666 list_move_tail(&vma->userptr.invalidate_link,
667 &vm->userptr.invalidated);
668 spin_unlock(&vm->userptr.invalidated_lock);
669 }
670
671 up_write(&vm->userptr.notifier_lock);
672
673 /*
674 * Preempt fences turn into schedule disables, pipeline these.
675 * Note that even in fault mode, we need to wait for binds and
676 * unbinds to complete, and those are attached as BOOKMARK fences
677 * to the vm.
678 */
b06d47be 679 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
dd08ebf6
MB
680 DMA_RESV_USAGE_BOOKKEEP);
681 dma_resv_for_each_fence_unlocked(&cursor, fence)
682 dma_fence_enable_sw_signaling(fence);
683 dma_resv_iter_end(&cursor);
684
b06d47be 685 err = dma_resv_wait_timeout(xe_vm_resv(vm),
dd08ebf6
MB
686 DMA_RESV_USAGE_BOOKKEEP,
687 false, MAX_SCHEDULE_TIMEOUT);
688 XE_WARN_ON(err <= 0);
689
690 if (xe_vm_in_fault_mode(vm)) {
691 err = xe_vm_invalidate_vma(vma);
692 XE_WARN_ON(err);
693 }
694
695 trace_xe_vma_userptr_invalidate_complete(vma);
696
697 return true;
698}
699
700static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
701 .invalidate = vma_userptr_invalidate,
702};
703
704int xe_vm_userptr_pin(struct xe_vm *vm)
705{
706 struct xe_vma *vma, *next;
707 int err = 0;
708 LIST_HEAD(tmp_evict);
709
710 lockdep_assert_held_write(&vm->lock);
711
712 /* Collect invalidated userptrs */
713 spin_lock(&vm->userptr.invalidated_lock);
714 list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
715 userptr.invalidate_link) {
716 list_del_init(&vma->userptr.invalidate_link);
24f947d5
TH
717 list_move_tail(&vma->combined_links.userptr,
718 &vm->userptr.repin_list);
dd08ebf6
MB
719 }
720 spin_unlock(&vm->userptr.invalidated_lock);
721
722 /* Pin and move to temporary list */
1655c893
MB
723 list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
724 combined_links.userptr) {
dd08ebf6
MB
725 err = xe_vma_userptr_pin_pages(vma);
726 if (err < 0)
24f947d5 727 return err;
dd08ebf6 728
24f947d5 729 list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
dd08ebf6
MB
730 }
731
dd08ebf6 732 return 0;
dd08ebf6
MB
733}
734
735/**
736 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
737 * that need repinning.
738 * @vm: The VM.
739 *
740 * This function does an advisory check for whether the VM has userptrs that
741 * need repinning.
742 *
743 * Return: 0 if there are no indications of userptrs needing repinning,
744 * -EAGAIN if there are.
745 */
746int xe_vm_userptr_check_repin(struct xe_vm *vm)
747{
748 return (list_empty_careful(&vm->userptr.repin_list) &&
749 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
750}
751
752static struct dma_fence *
9b9529ce 753xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
b06d47be
MB
754 struct xe_sync_entry *syncs, u32 num_syncs,
755 bool first_op, bool last_op);
dd08ebf6
MB
756
757struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
758{
759 struct dma_fence *fence = NULL;
760 struct xe_vma *vma, *next;
761
762 lockdep_assert_held(&vm->lock);
fdb6a053 763 if (xe_vm_in_lr_mode(vm) && !rebind_worker)
dd08ebf6
MB
764 return NULL;
765
766 xe_vm_assert_held(vm);
1655c893
MB
767 list_for_each_entry_safe(vma, next, &vm->rebind_list,
768 combined_links.rebind) {
c73acc1e 769 xe_assert(vm->xe, vma->tile_present);
dd08ebf6 770
1655c893 771 list_del_init(&vma->combined_links.rebind);
dd08ebf6
MB
772 dma_fence_put(fence);
773 if (rebind_worker)
774 trace_xe_vma_rebind_worker(vma);
775 else
776 trace_xe_vma_rebind_exec(vma);
b06d47be 777 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
dd08ebf6
MB
778 if (IS_ERR(fence))
779 return fence;
780 }
781
782 return fence;
783}
784
3b97e3b2
MB
785#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
786#define VMA_CREATE_FLAG_IS_NULL BIT(1)
787
dd08ebf6
MB
788static struct xe_vma *xe_vma_create(struct xe_vm *vm,
789 struct xe_bo *bo,
790 u64 bo_offset_or_userptr,
791 u64 start, u64 end,
3b97e3b2 792 u16 pat_index, unsigned int flags)
dd08ebf6
MB
793{
794 struct xe_vma *vma;
876611c2 795 struct xe_tile *tile;
dd08ebf6 796 u8 id;
3b97e3b2
MB
797 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
798 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
dd08ebf6 799
c73acc1e
FD
800 xe_assert(vm->xe, start < end);
801 xe_assert(vm->xe, end < vm->size);
dd08ebf6 802
a4cc60a5
MB
803 if (!bo && !is_null) /* userptr */
804 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
805 else
806 vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
807 GFP_KERNEL);
dd08ebf6
MB
808 if (!vma) {
809 vma = ERR_PTR(-ENOMEM);
810 return vma;
811 }
812
1655c893 813 INIT_LIST_HEAD(&vma->combined_links.rebind);
dd08ebf6 814
b06d47be
MB
815 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
816 vma->gpuva.vm = &vm->gpuvm;
817 vma->gpuva.va.addr = start;
818 vma->gpuva.va.range = end - start + 1;
dd08ebf6 819 if (read_only)
b06d47be 820 vma->gpuva.flags |= XE_VMA_READ_ONLY;
37430402 821 if (is_null)
b06d47be 822 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
dd08ebf6 823
cad4a0d6
RV
824 for_each_tile(tile, vm->xe, id)
825 vma->tile_mask |= 0x1 << id;
dd08ebf6 826
9be79251 827 if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
b06d47be 828 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
dd08ebf6 829
e1fbc4f1
MA
830 vma->pat_index = pat_index;
831
dd08ebf6 832 if (bo) {
b06d47be
MB
833 struct drm_gpuvm_bo *vm_bo;
834
dd08ebf6 835 xe_bo_assert_held(bo);
b06d47be
MB
836
837 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
838 if (IS_ERR(vm_bo)) {
839 kfree(vma);
840 return ERR_CAST(vm_bo);
841 }
842
24f947d5 843 drm_gpuvm_bo_extobj_add(vm_bo);
b06d47be
MB
844 drm_gem_object_get(&bo->ttm.base);
845 vma->gpuva.gem.obj = &bo->ttm.base;
846 vma->gpuva.gem.offset = bo_offset_or_userptr;
847 drm_gpuva_link(&vma->gpuva, vm_bo);
848 drm_gpuvm_bo_put(vm_bo);
37430402
MB
849 } else /* userptr or null */ {
850 if (!is_null) {
851 u64 size = end - start + 1;
852 int err;
dd08ebf6 853
a4cc60a5 854 INIT_LIST_HEAD(&vma->userptr.invalidate_link);
b06d47be 855 vma->gpuva.gem.offset = bo_offset_or_userptr;
dd08ebf6 856
37430402
MB
857 err = mmu_interval_notifier_insert(&vma->userptr.notifier,
858 current->mm,
21ed3327 859 xe_vma_userptr(vma), size,
37430402
MB
860 &vma_userptr_notifier_ops);
861 if (err) {
862 kfree(vma);
863 vma = ERR_PTR(err);
864 return vma;
865 }
866
867 vma->userptr.notifier_seq = LONG_MAX;
dd08ebf6
MB
868 }
869
dd08ebf6
MB
870 xe_vm_get(vm);
871 }
872
873 return vma;
874}
875
dd08ebf6
MB
876static void xe_vma_destroy_late(struct xe_vma *vma)
877{
21ed3327 878 struct xe_vm *vm = xe_vma_vm(vma);
dd08ebf6 879 struct xe_device *xe = vm->xe;
21ed3327 880 bool read_only = xe_vma_read_only(vma);
dd08ebf6
MB
881
882 if (xe_vma_is_userptr(vma)) {
883 if (vma->userptr.sg) {
884 dma_unmap_sgtable(xe->drm.dev,
885 vma->userptr.sg,
886 read_only ? DMA_TO_DEVICE :
887 DMA_BIDIRECTIONAL, 0);
888 sg_free_table(vma->userptr.sg);
889 vma->userptr.sg = NULL;
890 }
891
892 /*
893 * Since userptr pages are not pinned, we can't remove
894 * the notifer until we're sure the GPU is not accessing
895 * them anymore
896 */
897 mmu_interval_notifier_remove(&vma->userptr.notifier);
898 xe_vm_put(vm);
37430402
MB
899 } else if (xe_vma_is_null(vma)) {
900 xe_vm_put(vm);
dd08ebf6 901 } else {
21ed3327 902 xe_bo_put(xe_vma_bo(vma));
dd08ebf6
MB
903 }
904
905 kfree(vma);
906}
907
908static void vma_destroy_work_func(struct work_struct *w)
909{
910 struct xe_vma *vma =
911 container_of(w, struct xe_vma, destroy_work);
912
913 xe_vma_destroy_late(vma);
914}
915
dd08ebf6
MB
916static void vma_destroy_cb(struct dma_fence *fence,
917 struct dma_fence_cb *cb)
918{
919 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
920
921 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
922 queue_work(system_unbound_wq, &vma->destroy_work);
923}
924
925static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
926{
21ed3327 927 struct xe_vm *vm = xe_vma_vm(vma);
dd08ebf6
MB
928
929 lockdep_assert_held_write(&vm->lock);
c73acc1e 930 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
dd08ebf6
MB
931
932 if (xe_vma_is_userptr(vma)) {
c73acc1e 933 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
b06d47be 934
dd08ebf6 935 spin_lock(&vm->userptr.invalidated_lock);
3daf694c 936 list_del(&vma->userptr.invalidate_link);
dd08ebf6 937 spin_unlock(&vm->userptr.invalidated_lock);
37430402 938 } else if (!xe_vma_is_null(vma)) {
21ed3327 939 xe_bo_assert_held(xe_vma_bo(vma));
dd08ebf6 940
b06d47be 941 drm_gpuva_unlink(&vma->gpuva);
dd08ebf6
MB
942 }
943
944 xe_vm_assert_held(vm);
dd08ebf6
MB
945 if (fence) {
946 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
947 vma_destroy_cb);
948
949 if (ret) {
950 XE_WARN_ON(ret != -ENOENT);
951 xe_vma_destroy_late(vma);
952 }
953 } else {
954 xe_vma_destroy_late(vma);
955 }
956}
957
2714d509
TH
958/**
959 * xe_vm_prepare_vma() - drm_exec utility to lock a vma
960 * @exec: The drm_exec object we're currently locking for.
961 * @vma: The vma for witch we want to lock the vm resv and any attached
962 * object's resv.
963 * @num_shared: The number of dma-fence slots to pre-allocate in the
964 * objects' reservation objects.
965 *
966 * Return: 0 on success, negative error code on error. In particular
967 * may return -EDEADLK on WW transaction contention and -EINTR if
968 * an interruptible wait is terminated by a signal.
969 */
970int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
971 unsigned int num_shared)
972{
973 struct xe_vm *vm = xe_vma_vm(vma);
974 struct xe_bo *bo = xe_vma_bo(vma);
975 int err;
976
977 XE_WARN_ON(!vm);
978 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
979 if (!err && bo && !bo->vm)
980 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
981
982 return err;
983}
984
dd08ebf6
MB
985static void xe_vma_destroy_unlocked(struct xe_vma *vma)
986{
1f727182 987 struct drm_exec exec;
dd08ebf6
MB
988 int err;
989
1f727182
TH
990 drm_exec_init(&exec, 0);
991 drm_exec_until_all_locked(&exec) {
992 err = xe_vm_prepare_vma(&exec, vma, 0);
993 drm_exec_retry_on_contention(&exec);
994 if (XE_WARN_ON(err))
995 break;
dd08ebf6 996 }
dd08ebf6
MB
997
998 xe_vma_destroy(vma, NULL);
999
1f727182 1000 drm_exec_fini(&exec);
dd08ebf6
MB
1001}
1002
dd08ebf6 1003struct xe_vma *
b06d47be 1004xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
dd08ebf6 1005{
b06d47be 1006 struct drm_gpuva *gpuva;
dd08ebf6 1007
9d858b69
MB
1008 lockdep_assert_held(&vm->lock);
1009
1010 if (xe_vm_is_closed_or_banned(vm))
dd08ebf6
MB
1011 return NULL;
1012
c73acc1e 1013 xe_assert(vm->xe, start + range <= vm->size);
dd08ebf6 1014
b06d47be 1015 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
dd08ebf6 1016
b06d47be 1017 return gpuva ? gpuva_to_vma(gpuva) : NULL;
dd08ebf6
MB
1018}
1019
b06d47be 1020static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
dd08ebf6 1021{
b06d47be
MB
1022 int err;
1023
c73acc1e 1024 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
dd08ebf6
MB
1025 lockdep_assert_held(&vm->lock);
1026
b06d47be
MB
1027 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1028 XE_WARN_ON(err); /* Shouldn't be possible */
1029
1030 return err;
dd08ebf6
MB
1031}
1032
1033static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1034{
c73acc1e 1035 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
dd08ebf6
MB
1036 lockdep_assert_held(&vm->lock);
1037
b06d47be 1038 drm_gpuva_remove(&vma->gpuva);
dd08ebf6
MB
1039 if (vm->usm.last_fault_vma == vma)
1040 vm->usm.last_fault_vma = NULL;
1041}
1042
b06d47be
MB
1043static struct drm_gpuva_op *xe_vm_op_alloc(void)
1044{
1045 struct xe_vma_op *op;
1046
1047 op = kzalloc(sizeof(*op), GFP_KERNEL);
1048
1049 if (unlikely(!op))
1050 return NULL;
1051
1052 return &op->base;
1053}
1054
1055static void xe_vm_free(struct drm_gpuvm *gpuvm);
1056
1057static struct drm_gpuvm_ops gpuvm_ops = {
1058 .op_alloc = xe_vm_op_alloc,
24f947d5 1059 .vm_bo_validate = xe_gpuvm_validate,
b06d47be
MB
1060 .vm_free = xe_vm_free,
1061};
1062
e814389f 1063static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
0e5e77bd 1064{
fcd75139 1065 u64 pte = 0;
0e5e77bd 1066
fcd75139
LDM
1067 if (pat_index & BIT(0))
1068 pte |= XE_PPGTT_PTE_PAT0;
0e5e77bd 1069
fcd75139
LDM
1070 if (pat_index & BIT(1))
1071 pte |= XE_PPGTT_PTE_PAT1;
1072
1073 return pte;
0e5e77bd
LDM
1074}
1075
bf6d941c
MA
1076static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1077 u32 pt_level)
0e5e77bd 1078{
fcd75139
LDM
1079 u64 pte = 0;
1080
1081 if (pat_index & BIT(0))
1082 pte |= XE_PPGTT_PTE_PAT0;
1083
1084 if (pat_index & BIT(1))
1085 pte |= XE_PPGTT_PTE_PAT1;
1086
bf6d941c
MA
1087 if (pat_index & BIT(2)) {
1088 if (pt_level)
1089 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1090 else
1091 pte |= XE_PPGTT_PTE_PAT2;
1092 }
fcd75139
LDM
1093
1094 if (pat_index & BIT(3))
1095 pte |= XELPG_PPGTT_PTE_PAT3;
1096
5803bdc8
LDM
1097 if (pat_index & (BIT(4)))
1098 pte |= XE2_PPGTT_PTE_PAT4;
1099
fcd75139 1100 return pte;
0e5e77bd
LDM
1101}
1102
1103static u64 pte_encode_ps(u32 pt_level)
1104{
e84d716d 1105 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
0e5e77bd
LDM
1106
1107 if (pt_level == 1)
1108 return XE_PDE_PS_2M;
1109 else if (pt_level == 2)
1110 return XE_PDPE_PS_1G;
1111
1112 return 0;
1113}
1114
1115static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
e814389f 1116 const u16 pat_index)
0e5e77bd 1117{
fcd75139 1118 struct xe_device *xe = xe_bo_device(bo);
0e5e77bd
LDM
1119 u64 pde;
1120
1121 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1122 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
e814389f 1123 pde |= pde_encode_pat_index(xe, pat_index);
0e5e77bd
LDM
1124
1125 return pde;
1126}
1127
1128static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
e814389f 1129 u16 pat_index, u32 pt_level)
0e5e77bd 1130{
fcd75139 1131 struct xe_device *xe = xe_bo_device(bo);
0e5e77bd
LDM
1132 u64 pte;
1133
1134 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1135 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
bf6d941c 1136 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
0e5e77bd
LDM
1137 pte |= pte_encode_ps(pt_level);
1138
1139 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1140 pte |= XE_PPGTT_PTE_DM;
1141
1142 return pte;
1143}
1144
1145static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
e814389f 1146 u16 pat_index, u32 pt_level)
0e5e77bd 1147{
fcd75139
LDM
1148 struct xe_device *xe = xe_vma_vm(vma)->xe;
1149
0e5e77bd
LDM
1150 pte |= XE_PAGE_PRESENT;
1151
1152 if (likely(!xe_vma_read_only(vma)))
1153 pte |= XE_PAGE_RW;
1154
bf6d941c 1155 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
0e5e77bd
LDM
1156 pte |= pte_encode_ps(pt_level);
1157
1158 if (unlikely(xe_vma_is_null(vma)))
1159 pte |= XE_PTE_NULL;
1160
1161 return pte;
1162}
1163
fcd75139 1164static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
e814389f 1165 u16 pat_index,
23c8495e
LDM
1166 u32 pt_level, bool devmem, u64 flags)
1167{
1168 u64 pte;
1169
1170 /* Avoid passing random bits directly as flags */
28523083 1171 xe_assert(xe, !(flags & ~XE_PTE_PS64));
23c8495e
LDM
1172
1173 pte = addr;
1174 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
bf6d941c 1175 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
23c8495e
LDM
1176 pte |= pte_encode_ps(pt_level);
1177
1178 if (devmem)
1179 pte |= XE_PPGTT_PTE_DM;
1180
1181 pte |= flags;
1182
1183 return pte;
1184}
1185
0e5e77bd
LDM
1186static const struct xe_pt_ops xelp_pt_ops = {
1187 .pte_encode_bo = xelp_pte_encode_bo,
1188 .pte_encode_vma = xelp_pte_encode_vma,
23c8495e 1189 .pte_encode_addr = xelp_pte_encode_addr,
0e5e77bd
LDM
1190 .pde_encode_bo = xelp_pde_encode_bo,
1191};
1192
dd08ebf6
MB
1193static void vm_destroy_work_func(struct work_struct *w);
1194
06951c2e
TH
1195/**
1196 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1197 * given tile and vm.
1198 * @xe: xe device.
1199 * @tile: tile to set up for.
1200 * @vm: vm to set up for.
1201 *
1202 * Sets up a pagetable tree with one page-table per level and a single
1203 * leaf PTE. All pagetable entries point to the single page-table or,
1204 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1205 * writes become NOPs.
1206 *
1207 * Return: 0 on success, negative error code on error.
1208 */
1209static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1210 struct xe_vm *vm)
1211{
1212 u8 id = tile->id;
1213 int i;
1214
1215 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1216 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1217 if (IS_ERR(vm->scratch_pt[id][i]))
1218 return PTR_ERR(vm->scratch_pt[id][i]);
1219
1220 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1221 }
1222
1223 return 0;
1224}
1225
1226static void xe_vm_free_scratch(struct xe_vm *vm)
1227{
1228 struct xe_tile *tile;
1229 u8 id;
1230
1231 if (!xe_vm_has_scratch(vm))
1232 return;
1233
1234 for_each_tile(tile, vm->xe, id) {
1235 u32 i;
1236
1237 if (!vm->pt_root[id])
1238 continue;
1239
1240 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1241 if (vm->scratch_pt[id][i])
1242 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1243 }
1244}
1245
dd08ebf6
MB
1246struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1247{
b06d47be 1248 struct drm_gem_object *vm_resv_obj;
dd08ebf6 1249 struct xe_vm *vm;
b06d47be 1250 int err, number_tiles = 0;
876611c2 1251 struct xe_tile *tile;
dd08ebf6
MB
1252 u8 id;
1253
1254 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1255 if (!vm)
1256 return ERR_PTR(-ENOMEM);
1257
1258 vm->xe = xe;
dd08ebf6 1259
e9bb0891 1260 vm->size = 1ull << xe->info.va_bits;
dd08ebf6 1261
dd08ebf6
MB
1262 vm->flags = flags;
1263
1264 init_rwsem(&vm->lock);
1265
1266 INIT_LIST_HEAD(&vm->rebind_list);
1267
1268 INIT_LIST_HEAD(&vm->userptr.repin_list);
1269 INIT_LIST_HEAD(&vm->userptr.invalidated);
1270 init_rwsem(&vm->userptr.notifier_lock);
1271 spin_lock_init(&vm->userptr.invalidated_lock);
1272
dd08ebf6
MB
1273 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1274
9b9529ce 1275 INIT_LIST_HEAD(&vm->preempt.exec_queues);
dd08ebf6
MB
1276 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1277
fd84041d
MB
1278 for_each_tile(tile, xe, id)
1279 xe_range_fence_tree_init(&vm->rftree[id]);
1280
0e5e77bd
LDM
1281 vm->pt_ops = &xelp_pt_ops;
1282
2d30332a 1283 if (!(flags & XE_VM_FLAG_MIGRATION))
dd08ebf6 1284 xe_device_mem_access_get(xe);
dd08ebf6 1285
b06d47be
MB
1286 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1287 if (!vm_resv_obj) {
1288 err = -ENOMEM;
1289 goto err_no_resv;
1290 }
1291
35705e32
TH
1292 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1293 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
b06d47be
MB
1294
1295 drm_gem_object_put(vm_resv_obj);
1296
1297 err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
dd08ebf6 1298 if (err)
b06d47be 1299 goto err_close;
dd08ebf6
MB
1300
1301 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
0d39b6da 1302 vm->flags |= XE_VM_FLAG_64K;
dd08ebf6 1303
876611c2 1304 for_each_tile(tile, xe, id) {
dd08ebf6 1305 if (flags & XE_VM_FLAG_MIGRATION &&
0d39b6da 1306 tile->id != XE_VM_FLAG_TILE_ID(flags))
dd08ebf6
MB
1307 continue;
1308
876611c2 1309 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
dd08ebf6
MB
1310 if (IS_ERR(vm->pt_root[id])) {
1311 err = PTR_ERR(vm->pt_root[id]);
1312 vm->pt_root[id] = NULL;
b06d47be 1313 goto err_unlock_close;
dd08ebf6
MB
1314 }
1315 }
1316
06951c2e 1317 if (xe_vm_has_scratch(vm)) {
876611c2 1318 for_each_tile(tile, xe, id) {
dd08ebf6
MB
1319 if (!vm->pt_root[id])
1320 continue;
1321
06951c2e 1322 err = xe_vm_create_scratch(xe, tile, vm);
dd08ebf6 1323 if (err)
b06d47be 1324 goto err_unlock_close;
dd08ebf6 1325 }
85dbfe47 1326 vm->batch_invalidate_tlb = true;
dd08ebf6
MB
1327 }
1328
fdb6a053 1329 if (flags & XE_VM_FLAG_LR_MODE) {
dd08ebf6 1330 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
fdb6a053 1331 vm->flags |= XE_VM_FLAG_LR_MODE;
85dbfe47 1332 vm->batch_invalidate_tlb = false;
dd08ebf6
MB
1333 }
1334
dd08ebf6 1335 /* Fill pt_root after allocating scratch tables */
876611c2 1336 for_each_tile(tile, xe, id) {
dd08ebf6
MB
1337 if (!vm->pt_root[id])
1338 continue;
1339
876611c2 1340 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
dd08ebf6 1341 }
b06d47be 1342 dma_resv_unlock(xe_vm_resv(vm));
dd08ebf6
MB
1343
1344 /* Kernel migration VM shouldn't have a circular loop.. */
1345 if (!(flags & XE_VM_FLAG_MIGRATION)) {
876611c2 1346 for_each_tile(tile, xe, id) {
f6929e80 1347 struct xe_gt *gt = tile->primary_gt;
dd08ebf6 1348 struct xe_vm *migrate_vm;
9b9529ce 1349 struct xe_exec_queue *q;
d3d76739 1350 u32 create_flags = EXEC_QUEUE_FLAG_VM;
dd08ebf6
MB
1351
1352 if (!vm->pt_root[id])
1353 continue;
1354
08dea767 1355 migrate_vm = xe_migrate_get_vm(tile->migrate);
9b9529ce
FD
1356 q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1357 XE_ENGINE_CLASS_COPY,
f3e9b1f4 1358 create_flags);
dd08ebf6 1359 xe_vm_put(migrate_vm);
9b9529ce
FD
1360 if (IS_ERR(q)) {
1361 err = PTR_ERR(q);
b06d47be 1362 goto err_close;
dd08ebf6 1363 }
9b9529ce 1364 vm->q[id] = q;
876611c2 1365 number_tiles++;
dd08ebf6
MB
1366 }
1367 }
1368
876611c2 1369 if (number_tiles > 1)
dd08ebf6
MB
1370 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1371
1372 mutex_lock(&xe->usm.lock);
1373 if (flags & XE_VM_FLAG_FAULT_MODE)
1374 xe->usm.num_vm_in_fault_mode++;
1375 else if (!(flags & XE_VM_FLAG_MIGRATION))
1376 xe->usm.num_vm_in_non_fault_mode++;
1377 mutex_unlock(&xe->usm.lock);
1378
1379 trace_xe_vm_create(vm);
1380
1381 return vm;
1382
b06d47be
MB
1383err_unlock_close:
1384 dma_resv_unlock(xe_vm_resv(vm));
1385err_close:
1386 xe_vm_close_and_put(vm);
1387 return ERR_PTR(err);
dd08ebf6 1388
b06d47be 1389err_no_resv:
fd84041d
MB
1390 for_each_tile(tile, xe, id)
1391 xe_range_fence_tree_fini(&vm->rftree[id]);
dd08ebf6 1392 kfree(vm);
2d30332a 1393 if (!(flags & XE_VM_FLAG_MIGRATION))
dd08ebf6 1394 xe_device_mem_access_put(xe);
dd08ebf6
MB
1395 return ERR_PTR(err);
1396}
1397
9d858b69
MB
1398static void xe_vm_close(struct xe_vm *vm)
1399{
1400 down_write(&vm->lock);
1401 vm->size = 0;
1402 up_write(&vm->lock);
1403}
1404
dd08ebf6
MB
1405void xe_vm_close_and_put(struct xe_vm *vm)
1406{
b06d47be 1407 LIST_HEAD(contested);
dd08ebf6 1408 struct xe_device *xe = vm->xe;
876611c2 1409 struct xe_tile *tile;
b06d47be
MB
1410 struct xe_vma *vma, *next_vma;
1411 struct drm_gpuva *gpuva, *next;
dd08ebf6
MB
1412 u8 id;
1413
c73acc1e 1414 xe_assert(xe, !vm->preempt.num_exec_queues);
dd08ebf6 1415
9d858b69 1416 xe_vm_close(vm);
fdb6a053 1417 if (xe_vm_in_preempt_fence_mode(vm))
dd08ebf6
MB
1418 flush_work(&vm->preempt.rebind_work);
1419
e669f10c
MB
1420 down_write(&vm->lock);
1421 for_each_tile(tile, xe, id) {
1422 if (vm->q[id])
1423 xe_exec_queue_last_fence_put(vm->q[id], vm);
1424 }
1425 up_write(&vm->lock);
1426
876611c2 1427 for_each_tile(tile, xe, id) {
9b9529ce
FD
1428 if (vm->q[id]) {
1429 xe_exec_queue_kill(vm->q[id]);
1430 xe_exec_queue_put(vm->q[id]);
1431 vm->q[id] = NULL;
dd08ebf6
MB
1432 }
1433 }
1434
1435 down_write(&vm->lock);
d00e9cc2 1436 xe_vm_lock(vm, false);
b06d47be
MB
1437 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1438 vma = gpuva_to_vma(gpuva);
dd08ebf6 1439
37430402 1440 if (xe_vma_has_no_bo(vma)) {
dd08ebf6 1441 down_read(&vm->userptr.notifier_lock);
b06d47be 1442 vma->gpuva.flags |= XE_VMA_DESTROYED;
dd08ebf6
MB
1443 up_read(&vm->userptr.notifier_lock);
1444 }
1445
b06d47be 1446 xe_vm_remove_vma(vm, vma);
dd08ebf6
MB
1447
1448 /* easy case, remove from VMA? */
21ed3327 1449 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1655c893 1450 list_del_init(&vma->combined_links.rebind);
dd08ebf6
MB
1451 xe_vma_destroy(vma, NULL);
1452 continue;
1453 }
1454
1655c893 1455 list_move_tail(&vma->combined_links.destroy, &contested);
ca8656a2 1456 vma->gpuva.flags |= XE_VMA_DESTROYED;
dd08ebf6
MB
1457 }
1458
1459 /*
1460 * All vm operations will add shared fences to resv.
1461 * The only exception is eviction for a shared object,
1462 * but even so, the unbind when evicted would still
1463 * install a fence to resv. Hence it's safe to
1464 * destroy the pagetables immediately.
1465 */
06951c2e
TH
1466 xe_vm_free_scratch(vm);
1467
876611c2 1468 for_each_tile(tile, xe, id) {
b06d47be
MB
1469 if (vm->pt_root[id]) {
1470 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1471 vm->pt_root[id] = NULL;
1472 }
dd08ebf6 1473 }
d00e9cc2 1474 xe_vm_unlock(vm);
dd08ebf6 1475
b06d47be
MB
1476 /*
1477 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1478 * Since we hold a refcount to the bo, we can remove and free
1479 * the members safely without locking.
1480 */
1655c893
MB
1481 list_for_each_entry_safe(vma, next_vma, &contested,
1482 combined_links.destroy) {
1483 list_del_init(&vma->combined_links.destroy);
b06d47be 1484 xe_vma_destroy_unlocked(vma);
dd08ebf6
MB
1485 }
1486
dd08ebf6
MB
1487 up_write(&vm->lock);
1488
cf667aec
MB
1489 mutex_lock(&xe->usm.lock);
1490 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1491 xe->usm.num_vm_in_fault_mode--;
1492 else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1493 xe->usm.num_vm_in_non_fault_mode--;
1494 mutex_unlock(&xe->usm.lock);
1495
fd84041d
MB
1496 for_each_tile(tile, xe, id)
1497 xe_range_fence_tree_fini(&vm->rftree[id]);
1498
dd08ebf6
MB
1499 xe_vm_put(vm);
1500}
1501
1502static void vm_destroy_work_func(struct work_struct *w)
1503{
1504 struct xe_vm *vm =
1505 container_of(w, struct xe_vm, destroy_work);
dd08ebf6 1506 struct xe_device *xe = vm->xe;
876611c2 1507 struct xe_tile *tile;
dd08ebf6
MB
1508 u8 id;
1509 void *lookup;
1510
1511 /* xe_vm_close_and_put was not called? */
c73acc1e 1512 xe_assert(xe, !vm->size);
dd08ebf6
MB
1513
1514 if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1515 xe_device_mem_access_put(xe);
dd08ebf6 1516
06d5ae90 1517 if (xe->info.has_asid && vm->usm.asid) {
a12d9216
MB
1518 mutex_lock(&xe->usm.lock);
1519 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
c73acc1e 1520 xe_assert(xe, lookup == vm);
a12d9216
MB
1521 mutex_unlock(&xe->usm.lock);
1522 }
dd08ebf6
MB
1523 }
1524
b06d47be
MB
1525 for_each_tile(tile, xe, id)
1526 XE_WARN_ON(vm->pt_root[id]);
dd08ebf6 1527
dd08ebf6
MB
1528 trace_xe_vm_free(vm);
1529 dma_fence_put(vm->rebind_fence);
dd08ebf6 1530 kfree(vm);
dd08ebf6
MB
1531}
1532
b06d47be 1533static void xe_vm_free(struct drm_gpuvm *gpuvm)
dd08ebf6 1534{
b06d47be 1535 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
dd08ebf6
MB
1536
1537 /* To destroy the VM we need to be able to sleep */
1538 queue_work(system_unbound_wq, &vm->destroy_work);
1539}
1540
1541struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1542{
1543 struct xe_vm *vm;
1544
1545 mutex_lock(&xef->vm.lock);
1546 vm = xa_load(&xef->vm.xa, id);
dd08ebf6
MB
1547 if (vm)
1548 xe_vm_get(vm);
5835dc7f 1549 mutex_unlock(&xef->vm.lock);
dd08ebf6
MB
1550
1551 return vm;
1552}
1553
876611c2 1554u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
dd08ebf6 1555{
0e5e77bd 1556 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
e814389f 1557 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
dd08ebf6
MB
1558}
1559
e669f10c
MB
1560static struct xe_exec_queue *
1561to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1562{
1563 return q ? q : vm->q[0];
1564}
1565
dd08ebf6 1566static struct dma_fence *
9b9529ce 1567xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
b06d47be
MB
1568 struct xe_sync_entry *syncs, u32 num_syncs,
1569 bool first_op, bool last_op)
dd08ebf6 1570{
e669f10c
MB
1571 struct xe_vm *vm = xe_vma_vm(vma);
1572 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
876611c2 1573 struct xe_tile *tile;
dd08ebf6
MB
1574 struct dma_fence *fence = NULL;
1575 struct dma_fence **fences = NULL;
1576 struct dma_fence_array *cf = NULL;
dd08ebf6 1577 int cur_fence = 0, i;
63412a5a 1578 int number_tiles = hweight8(vma->tile_present);
dd08ebf6
MB
1579 int err;
1580 u8 id;
1581
1582 trace_xe_vma_unbind(vma);
1583
876611c2
MR
1584 if (number_tiles > 1) {
1585 fences = kmalloc_array(number_tiles, sizeof(*fences),
dd08ebf6
MB
1586 GFP_KERNEL);
1587 if (!fences)
1588 return ERR_PTR(-ENOMEM);
1589 }
1590
876611c2
MR
1591 for_each_tile(tile, vm->xe, id) {
1592 if (!(vma->tile_present & BIT(id)))
dd08ebf6
MB
1593 goto next;
1594
9a674bef
MB
1595 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1596 first_op ? syncs : NULL,
b06d47be 1597 first_op ? num_syncs : 0);
dd08ebf6
MB
1598 if (IS_ERR(fence)) {
1599 err = PTR_ERR(fence);
1600 goto err_fences;
1601 }
1602
1603 if (fences)
1604 fences[cur_fence++] = fence;
1605
1606next:
9b9529ce
FD
1607 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1608 q = list_next_entry(q, multi_gt_list);
dd08ebf6
MB
1609 }
1610
1611 if (fences) {
876611c2 1612 cf = dma_fence_array_create(number_tiles, fences,
dd08ebf6
MB
1613 vm->composite_fence_ctx,
1614 vm->composite_fence_seqno++,
1615 false);
1616 if (!cf) {
1617 --vm->composite_fence_seqno;
1618 err = -ENOMEM;
1619 goto err_fences;
1620 }
1621 }
1622
04dfef5b
BW
1623 fence = cf ? &cf->base : !fence ?
1624 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
b06d47be
MB
1625 if (last_op) {
1626 for (i = 0; i < num_syncs; i++)
04dfef5b 1627 xe_sync_entry_signal(&syncs[i], NULL, fence);
b06d47be 1628 }
dd08ebf6 1629
04dfef5b 1630 return fence;
dd08ebf6
MB
1631
1632err_fences:
1633 if (fences) {
f3e9b1f4 1634 while (cur_fence)
dd08ebf6 1635 dma_fence_put(fences[--cur_fence]);
dd08ebf6
MB
1636 kfree(fences);
1637 }
1638
1639 return ERR_PTR(err);
1640}
1641
1642static struct dma_fence *
9b9529ce 1643xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
b06d47be
MB
1644 struct xe_sync_entry *syncs, u32 num_syncs,
1645 bool first_op, bool last_op)
dd08ebf6 1646{
876611c2 1647 struct xe_tile *tile;
dd08ebf6
MB
1648 struct dma_fence *fence;
1649 struct dma_fence **fences = NULL;
1650 struct dma_fence_array *cf = NULL;
21ed3327 1651 struct xe_vm *vm = xe_vma_vm(vma);
dd08ebf6 1652 int cur_fence = 0, i;
63412a5a 1653 int number_tiles = hweight8(vma->tile_mask);
dd08ebf6
MB
1654 int err;
1655 u8 id;
1656
1657 trace_xe_vma_bind(vma);
1658
876611c2
MR
1659 if (number_tiles > 1) {
1660 fences = kmalloc_array(number_tiles, sizeof(*fences),
dd08ebf6
MB
1661 GFP_KERNEL);
1662 if (!fences)
1663 return ERR_PTR(-ENOMEM);
1664 }
1665
876611c2
MR
1666 for_each_tile(tile, vm->xe, id) {
1667 if (!(vma->tile_mask & BIT(id)))
dd08ebf6
MB
1668 goto next;
1669
9b9529ce 1670 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
7ead3315 1671 first_op ? syncs : NULL,
b06d47be 1672 first_op ? num_syncs : 0,
876611c2 1673 vma->tile_present & BIT(id));
dd08ebf6
MB
1674 if (IS_ERR(fence)) {
1675 err = PTR_ERR(fence);
1676 goto err_fences;
1677 }
1678
1679 if (fences)
1680 fences[cur_fence++] = fence;
1681
1682next:
9b9529ce
FD
1683 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1684 q = list_next_entry(q, multi_gt_list);
dd08ebf6
MB
1685 }
1686
1687 if (fences) {
876611c2 1688 cf = dma_fence_array_create(number_tiles, fences,
dd08ebf6
MB
1689 vm->composite_fence_ctx,
1690 vm->composite_fence_seqno++,
1691 false);
1692 if (!cf) {
1693 --vm->composite_fence_seqno;
1694 err = -ENOMEM;
1695 goto err_fences;
1696 }
1697 }
1698
b06d47be
MB
1699 if (last_op) {
1700 for (i = 0; i < num_syncs; i++)
1701 xe_sync_entry_signal(&syncs[i], NULL,
1702 cf ? &cf->base : fence);
1703 }
dd08ebf6
MB
1704
1705 return cf ? &cf->base : fence;
1706
1707err_fences:
1708 if (fences) {
f3e9b1f4 1709 while (cur_fence)
dd08ebf6 1710 dma_fence_put(fences[--cur_fence]);
dd08ebf6
MB
1711 kfree(fences);
1712 }
1713
1714 return ERR_PTR(err);
1715}
1716
dd08ebf6 1717static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
9b9529ce 1718 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
f3e9b1f4
MB
1719 u32 num_syncs, bool immediate, bool first_op,
1720 bool last_op)
dd08ebf6
MB
1721{
1722 struct dma_fence *fence;
e669f10c 1723 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
dd08ebf6
MB
1724
1725 xe_vm_assert_held(vm);
1726
b06d47be 1727 if (immediate) {
9b9529ce 1728 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
b06d47be
MB
1729 last_op);
1730 if (IS_ERR(fence))
1731 return PTR_ERR(fence);
1732 } else {
1733 int i;
1734
c73acc1e 1735 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
b06d47be 1736
e669f10c 1737 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
b06d47be
MB
1738 if (last_op) {
1739 for (i = 0; i < num_syncs; i++)
1740 xe_sync_entry_signal(&syncs[i], NULL, fence);
1741 }
1742 }
dd08ebf6 1743
e669f10c
MB
1744 if (last_op)
1745 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
dd08ebf6 1746 dma_fence_put(fence);
f3e9b1f4 1747
dd08ebf6
MB
1748 return 0;
1749}
1750
9b9529ce 1751static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
dd08ebf6 1752 struct xe_bo *bo, struct xe_sync_entry *syncs,
f3e9b1f4
MB
1753 u32 num_syncs, bool immediate, bool first_op,
1754 bool last_op)
dd08ebf6
MB
1755{
1756 int err;
1757
1758 xe_vm_assert_held(vm);
1759 xe_bo_assert_held(bo);
1760
b06d47be 1761 if (bo && immediate) {
dd08ebf6
MB
1762 err = xe_bo_validate(bo, vm, true);
1763 if (err)
1764 return err;
1765 }
1766
f3e9b1f4
MB
1767 return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1768 last_op);
dd08ebf6
MB
1769}
1770
1771static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
9b9529ce 1772 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
f3e9b1f4 1773 u32 num_syncs, bool first_op, bool last_op)
dd08ebf6
MB
1774{
1775 struct dma_fence *fence;
e669f10c 1776 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
dd08ebf6
MB
1777
1778 xe_vm_assert_held(vm);
21ed3327 1779 xe_bo_assert_held(xe_vma_bo(vma));
dd08ebf6 1780
9b9529ce 1781 fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
dd08ebf6
MB
1782 if (IS_ERR(fence))
1783 return PTR_ERR(fence);
dd08ebf6
MB
1784
1785 xe_vma_destroy(vma, fence);
e669f10c
MB
1786 if (last_op)
1787 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
dd08ebf6
MB
1788 dma_fence_put(fence);
1789
1790 return 0;
1791}
1792
3ac4a789 1793#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
9329f066 1794 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
3ac4a789 1795 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
dd08ebf6
MB
1796
1797int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1798 struct drm_file *file)
1799{
1800 struct xe_device *xe = to_xe_device(dev);
1801 struct xe_file *xef = to_xe_file(file);
1802 struct drm_xe_vm_create *args = data;
2ff00c4f 1803 struct xe_tile *tile;
dd08ebf6
MB
1804 struct xe_vm *vm;
1805 u32 id, asid;
1806 int err;
1807 u32 flags = 0;
1808
7224788f
RV
1809 if (XE_IOCTL_DBG(xe, args->extensions))
1810 return -EINVAL;
1811
7f6c6e50 1812 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
3ac4a789 1813 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
7f6c6e50 1814
3ac4a789 1815 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
5a92da34 1816 !xe->info.has_usm))
7f6c6e50
OZ
1817 return -EINVAL;
1818
b8c1ba83 1819 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1799c761
CS
1820 return -EINVAL;
1821
b8c1ba83 1822 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
dd08ebf6
MB
1823 return -EINVAL;
1824
3ac4a789
FD
1825 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1826 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
dd08ebf6
MB
1827 return -EINVAL;
1828
9329f066 1829 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
3ac4a789 1830 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
dd08ebf6
MB
1831 return -EINVAL;
1832
3ac4a789 1833 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
dd08ebf6
MB
1834 xe_device_in_non_fault_mode(xe)))
1835 return -EINVAL;
1836
3ac4a789 1837 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
dd08ebf6
MB
1838 xe_device_in_fault_mode(xe)))
1839 return -EINVAL;
1840
f3e9b1f4
MB
1841 if (XE_IOCTL_DBG(xe, args->extensions))
1842 return -EINVAL;
1843
3ac4a789 1844 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
dd08ebf6 1845 flags |= XE_VM_FLAG_SCRATCH_PAGE;
9329f066 1846 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
fdb6a053 1847 flags |= XE_VM_FLAG_LR_MODE;
3ac4a789 1848 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
9329f066 1849 flags |= XE_VM_FLAG_FAULT_MODE;
dd08ebf6
MB
1850
1851 vm = xe_vm_create(xe, flags);
1852 if (IS_ERR(vm))
1853 return PTR_ERR(vm);
1854
dd08ebf6
MB
1855 mutex_lock(&xef->vm.lock);
1856 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1857 mutex_unlock(&xef->vm.lock);
1858 if (err) {
1859 xe_vm_close_and_put(vm);
1860 return err;
1861 }
1862
5669899e 1863 if (xe->info.has_asid) {
a12d9216
MB
1864 mutex_lock(&xe->usm.lock);
1865 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
d2f51c50 1866 XA_LIMIT(1, XE_MAX_ASID - 1),
a12d9216
MB
1867 &xe->usm.next_asid, GFP_KERNEL);
1868 mutex_unlock(&xe->usm.lock);
d2f51c50 1869 if (err < 0) {
a12d9216
MB
1870 xe_vm_close_and_put(vm);
1871 return err;
1872 }
d2f51c50 1873 err = 0;
a12d9216 1874 vm->usm.asid = asid;
dd08ebf6 1875 }
dd08ebf6
MB
1876
1877 args->vm_id = id;
9e4e9761 1878 vm->xef = xef;
dd08ebf6 1879
2ff00c4f
TU
1880 /* Record BO memory for VM pagetable created against client */
1881 for_each_tile(tile, xe, id)
1882 if (vm->pt_root[id])
1883 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1884
dd08ebf6
MB
1885#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1886 /* Warning: Security issue - never enable by default */
58e19acf 1887 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
dd08ebf6
MB
1888#endif
1889
1890 return 0;
1891}
1892
1893int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1894 struct drm_file *file)
1895{
1896 struct xe_device *xe = to_xe_device(dev);
1897 struct xe_file *xef = to_xe_file(file);
1898 struct drm_xe_vm_destroy *args = data;
1899 struct xe_vm *vm;
5835dc7f 1900 int err = 0;
dd08ebf6 1901
b8c1ba83
FD
1902 if (XE_IOCTL_DBG(xe, args->pad) ||
1903 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
dd08ebf6
MB
1904 return -EINVAL;
1905
dd08ebf6 1906 mutex_lock(&xef->vm.lock);
5835dc7f 1907 vm = xa_load(&xef->vm.xa, args->vm_id);
b8c1ba83 1908 if (XE_IOCTL_DBG(xe, !vm))
5835dc7f 1909 err = -ENOENT;
9b9529ce 1910 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
5835dc7f
TH
1911 err = -EBUSY;
1912 else
1913 xa_erase(&xef->vm.xa, args->vm_id);
dd08ebf6
MB
1914 mutex_unlock(&xef->vm.lock);
1915
5835dc7f
TH
1916 if (!err)
1917 xe_vm_close_and_put(vm);
dd08ebf6 1918
5835dc7f 1919 return err;
dd08ebf6
MB
1920}
1921
1922static const u32 region_to_mem_type[] = {
1923 XE_PL_TT,
1924 XE_PL_VRAM0,
1925 XE_PL_VRAM1,
1926};
1927
1928static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
9b9529ce 1929 struct xe_exec_queue *q, u32 region,
dd08ebf6 1930 struct xe_sync_entry *syncs, u32 num_syncs,
f3e9b1f4 1931 bool first_op, bool last_op)
dd08ebf6 1932{
e669f10c 1933 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
dd08ebf6
MB
1934 int err;
1935
c73acc1e 1936 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
dd08ebf6 1937
37430402 1938 if (!xe_vma_has_no_bo(vma)) {
21ed3327 1939 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
dd08ebf6
MB
1940 if (err)
1941 return err;
1942 }
1943
876611c2 1944 if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
9b9529ce 1945 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
f3e9b1f4 1946 true, first_op, last_op);
dd08ebf6
MB
1947 } else {
1948 int i;
1949
1950 /* Nothing to do, signal fences now */
b06d47be 1951 if (last_op) {
e669f10c
MB
1952 for (i = 0; i < num_syncs; i++) {
1953 struct dma_fence *fence =
1954 xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1955
1956 xe_sync_entry_signal(&syncs[i], NULL, fence);
1957 }
b06d47be 1958 }
dd08ebf6 1959
f3e9b1f4 1960 return 0;
dd08ebf6 1961 }
dd08ebf6
MB
1962}
1963
b06d47be
MB
1964static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1965 bool post_commit)
dd08ebf6 1966{
b06d47be
MB
1967 down_read(&vm->userptr.notifier_lock);
1968 vma->gpuva.flags |= XE_VMA_DESTROYED;
1969 up_read(&vm->userptr.notifier_lock);
1970 if (post_commit)
1971 xe_vm_remove_vma(vm, vma);
dd08ebf6
MB
1972}
1973
b06d47be
MB
1974#undef ULL
1975#define ULL unsigned long long
1976
1977#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
1978static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
dd08ebf6 1979{
b06d47be 1980 struct xe_vma *vma;
dd08ebf6 1981
b06d47be
MB
1982 switch (op->op) {
1983 case DRM_GPUVA_OP_MAP:
1984 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1985 (ULL)op->map.va.addr, (ULL)op->map.va.range);
1986 break;
1987 case DRM_GPUVA_OP_REMAP:
1988 vma = gpuva_to_vma(op->remap.unmap->va);
1989 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1990 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
5f01a35b 1991 op->remap.unmap->keep ? 1 : 0);
b06d47be
MB
1992 if (op->remap.prev)
1993 vm_dbg(&xe->drm,
1994 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1995 (ULL)op->remap.prev->va.addr,
1996 (ULL)op->remap.prev->va.range);
1997 if (op->remap.next)
1998 vm_dbg(&xe->drm,
1999 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2000 (ULL)op->remap.next->va.addr,
2001 (ULL)op->remap.next->va.range);
2002 break;
2003 case DRM_GPUVA_OP_UNMAP:
2004 vma = gpuva_to_vma(op->unmap.va);
2005 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2006 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2007 op->unmap.keep ? 1 : 0);
2008 break;
b1f8f4b5
BW
2009 case DRM_GPUVA_OP_PREFETCH:
2010 vma = gpuva_to_vma(op->prefetch.va);
2011 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2012 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2013 break;
b06d47be 2014 default:
5c0553cd 2015 drm_warn(&xe->drm, "NOT POSSIBLE");
b06d47be
MB
2016 }
2017}
2018#else
2019static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
dd08ebf6 2020{
dd08ebf6 2021}
b06d47be 2022#endif
dd08ebf6 2023
b06d47be
MB
2024/*
2025 * Create operations list from IOCTL arguments, setup operations fields so parse
2026 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2027 */
2028static struct drm_gpuva_ops *
2029vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2030 u64 bo_offset_or_userptr, u64 addr, u64 range,
cad4a0d6 2031 u32 operation, u32 flags,
e1fbc4f1 2032 u32 prefetch_region, u16 pat_index)
dd08ebf6 2033{
b06d47be 2034 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
b06d47be
MB
2035 struct drm_gpuva_ops *ops;
2036 struct drm_gpuva_op *__op;
2037 struct xe_vma_op *op;
2038 struct drm_gpuvm_bo *vm_bo;
2039 int err;
dd08ebf6 2040
b06d47be 2041 lockdep_assert_held_write(&vm->lock);
dd08ebf6 2042
b06d47be
MB
2043 vm_dbg(&vm->xe->drm,
2044 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
78ddc872 2045 operation, (ULL)addr, (ULL)range,
b06d47be 2046 (ULL)bo_offset_or_userptr);
dd08ebf6 2047
78ddc872 2048 switch (operation) {
d5dc73db
FD
2049 case DRM_XE_VM_BIND_OP_MAP:
2050 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
b06d47be
MB
2051 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2052 obj, bo_offset_or_userptr);
b06d47be 2053 break;
d5dc73db 2054 case DRM_XE_VM_BIND_OP_UNMAP:
b06d47be 2055 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
b06d47be 2056 break;
d5dc73db 2057 case DRM_XE_VM_BIND_OP_PREFETCH:
b06d47be 2058 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
b06d47be 2059 break;
d5dc73db 2060 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
c73acc1e 2061 xe_assert(vm->xe, bo);
dd08ebf6 2062
08a4f00e 2063 err = xe_bo_lock(bo, true);
b06d47be
MB
2064 if (err)
2065 return ERR_PTR(err);
dd08ebf6 2066
9d0c1c56
TH
2067 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2068 if (IS_ERR(vm_bo)) {
2069 xe_bo_unlock(bo);
2070 return ERR_CAST(vm_bo);
2071 }
b06d47be
MB
2072
2073 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2074 drm_gpuvm_bo_put(vm_bo);
08a4f00e 2075 xe_bo_unlock(bo);
b06d47be
MB
2076 break;
2077 default:
5c0553cd 2078 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
b06d47be
MB
2079 ops = ERR_PTR(-EINVAL);
2080 }
40709aa7
MB
2081 if (IS_ERR(ops))
2082 return ops;
dd08ebf6 2083
b06d47be
MB
2084#ifdef TEST_VM_ASYNC_OPS_ERROR
2085 if (operation & FORCE_ASYNC_OP_ERROR) {
2086 op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
2087 base.entry);
2088 if (op)
2089 op->inject_error = true;
dd08ebf6 2090 }
b06d47be
MB
2091#endif
2092
40709aa7
MB
2093 drm_gpuva_for_each_op(__op, ops) {
2094 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2095
40709aa7
MB
2096 if (__op->op == DRM_GPUVA_OP_MAP) {
2097 op->map.immediate =
2098 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2099 op->map.read_only =
2100 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2101 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
e1fbc4f1 2102 op->map.pat_index = pat_index;
40709aa7
MB
2103 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2104 op->prefetch.region = prefetch_region;
2105 }
2106
2107 print_op(vm->xe, __op);
2108 }
b06d47be
MB
2109
2110 return ops;
dd08ebf6
MB
2111}
2112
b06d47be 2113static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
3b97e3b2 2114 u16 pat_index, unsigned int flags)
dd08ebf6 2115{
b06d47be 2116 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
24f947d5 2117 struct drm_exec exec;
b06d47be 2118 struct xe_vma *vma;
b06d47be 2119 int err;
dd08ebf6 2120
b06d47be 2121 lockdep_assert_held_write(&vm->lock);
dd08ebf6 2122
b06d47be 2123 if (bo) {
24f947d5
TH
2124 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
2125 drm_exec_until_all_locked(&exec) {
2126 err = 0;
2127 if (!bo->vm) {
2128 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2129 drm_exec_retry_on_contention(&exec);
2130 }
2131 if (!err) {
2132 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2133 drm_exec_retry_on_contention(&exec);
2134 }
2135 if (err) {
2136 drm_exec_fini(&exec);
2137 return ERR_PTR(err);
2138 }
2139 }
dd08ebf6 2140 }
b06d47be
MB
2141 vma = xe_vma_create(vm, bo, op->gem.offset,
2142 op->va.addr, op->va.addr +
3b97e3b2 2143 op->va.range - 1, pat_index, flags);
b06d47be 2144 if (bo)
24f947d5 2145 drm_exec_fini(&exec);
dd08ebf6 2146
b06d47be
MB
2147 if (xe_vma_is_userptr(vma)) {
2148 err = xe_vma_userptr_pin_pages(vma);
2149 if (err) {
2150 prep_vma_destroy(vm, vma, false);
2151 xe_vma_destroy_unlocked(vma);
2152 return ERR_PTR(err);
dd08ebf6 2153 }
b06d47be 2154 } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
b06d47be
MB
2155 err = add_preempt_fences(vm, bo);
2156 if (err) {
2157 prep_vma_destroy(vm, vma, false);
2158 xe_vma_destroy_unlocked(vma);
2159 return ERR_PTR(err);
2160 }
2161 }
2162
2163 return vma;
2164}
2165
8f33b4f0
MB
2166static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2167{
2168 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2169 return SZ_1G;
2170 else if (vma->gpuva.flags & XE_VMA_PTE_2M)
2171 return SZ_2M;
2172
2173 return SZ_4K;
2174}
2175
c47794bd
MB
2176static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2177{
2178 switch (size) {
2179 case SZ_1G:
2180 vma->gpuva.flags |= XE_VMA_PTE_1G;
2181 break;
2182 case SZ_2M:
2183 vma->gpuva.flags |= XE_VMA_PTE_2M;
2184 break;
2185 }
2186
2187 return SZ_4K;
2188}
2189
617eebb9
MB
2190static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2191{
2192 int err = 0;
2193
2194 lockdep_assert_held_write(&vm->lock);
2195
2196 switch (op->base.op) {
2197 case DRM_GPUVA_OP_MAP:
2198 err |= xe_vm_insert_vma(vm, op->map.vma);
2199 if (!err)
2200 op->flags |= XE_VMA_OP_COMMITTED;
2201 break;
2202 case DRM_GPUVA_OP_REMAP:
81d11b9d
MB
2203 {
2204 u8 tile_present =
2205 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2206
617eebb9
MB
2207 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2208 true);
2209 op->flags |= XE_VMA_OP_COMMITTED;
2210
2211 if (op->remap.prev) {
2212 err |= xe_vm_insert_vma(vm, op->remap.prev);
2213 if (!err)
2214 op->flags |= XE_VMA_OP_PREV_COMMITTED;
81d11b9d
MB
2215 if (!err && op->remap.skip_prev) {
2216 op->remap.prev->tile_present =
2217 tile_present;
617eebb9 2218 op->remap.prev = NULL;
81d11b9d 2219 }
617eebb9
MB
2220 }
2221 if (op->remap.next) {
2222 err |= xe_vm_insert_vma(vm, op->remap.next);
2223 if (!err)
2224 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
81d11b9d
MB
2225 if (!err && op->remap.skip_next) {
2226 op->remap.next->tile_present =
2227 tile_present;
617eebb9 2228 op->remap.next = NULL;
81d11b9d 2229 }
617eebb9
MB
2230 }
2231
2232 /* Adjust for partial unbind after removin VMA from VM */
2233 if (!err) {
2234 op->base.remap.unmap->va->va.addr = op->remap.start;
2235 op->base.remap.unmap->va->va.range = op->remap.range;
2236 }
2237 break;
81d11b9d 2238 }
617eebb9
MB
2239 case DRM_GPUVA_OP_UNMAP:
2240 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2241 op->flags |= XE_VMA_OP_COMMITTED;
2242 break;
2243 case DRM_GPUVA_OP_PREFETCH:
2244 op->flags |= XE_VMA_OP_COMMITTED;
2245 break;
2246 default:
5c0553cd 2247 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
617eebb9
MB
2248 }
2249
2250 return err;
2251}
2252
2253
9b9529ce 2254static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
617eebb9 2255 struct drm_gpuva_ops *ops,
b06d47be 2256 struct xe_sync_entry *syncs, u32 num_syncs,
d3d76739 2257 struct list_head *ops_list, bool last)
b06d47be
MB
2258{
2259 struct xe_vma_op *last_op = NULL;
617eebb9
MB
2260 struct drm_gpuva_op *__op;
2261 int err = 0;
b06d47be
MB
2262
2263 lockdep_assert_held_write(&vm->lock);
b06d47be 2264
617eebb9
MB
2265 drm_gpuva_for_each_op(__op, ops) {
2266 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3b97e3b2 2267 struct xe_vma *vma;
617eebb9 2268 bool first = list_empty(ops_list);
3b97e3b2 2269 unsigned int flags = 0;
dd08ebf6 2270
617eebb9
MB
2271 INIT_LIST_HEAD(&op->link);
2272 list_add_tail(&op->link, ops_list);
dd08ebf6 2273
617eebb9
MB
2274 if (first) {
2275 op->flags |= XE_VMA_OP_FIRST;
2276 op->num_syncs = num_syncs;
2277 op->syncs = syncs;
2278 }
dd08ebf6 2279
617eebb9
MB
2280 op->q = q;
2281
2282 switch (op->base.op) {
2283 case DRM_GPUVA_OP_MAP:
2284 {
3b97e3b2
MB
2285 flags |= op->map.read_only ?
2286 VMA_CREATE_FLAG_READ_ONLY : 0;
2287 flags |= op->map.is_null ?
2288 VMA_CREATE_FLAG_IS_NULL : 0;
dd08ebf6 2289
3b97e3b2
MB
2290 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2291 flags);
f3e9b1f4
MB
2292 if (IS_ERR(vma))
2293 return PTR_ERR(vma);
dd08ebf6 2294
617eebb9
MB
2295 op->map.vma = vma;
2296 break;
2297 }
2298 case DRM_GPUVA_OP_REMAP:
2299 {
2300 struct xe_vma *old =
2301 gpuva_to_vma(op->base.remap.unmap->va);
dd08ebf6 2302
617eebb9
MB
2303 op->remap.start = xe_vma_start(old);
2304 op->remap.range = xe_vma_size(old);
dd08ebf6 2305
617eebb9 2306 if (op->base.remap.prev) {
3b97e3b2
MB
2307 flags |= op->base.remap.unmap->va->flags &
2308 XE_VMA_READ_ONLY ?
2309 VMA_CREATE_FLAG_READ_ONLY : 0;
2310 flags |= op->base.remap.unmap->va->flags &
2311 DRM_GPUVA_SPARSE ?
2312 VMA_CREATE_FLAG_IS_NULL : 0;
2313
2314 vma = new_vma(vm, op->base.remap.prev,
2315 old->pat_index, flags);
f3e9b1f4
MB
2316 if (IS_ERR(vma))
2317 return PTR_ERR(vma);
dd08ebf6 2318
617eebb9
MB
2319 op->remap.prev = vma;
2320
2321 /*
2322 * Userptr creates a new SG mapping so
2323 * we must also rebind.
2324 */
2325 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2326 IS_ALIGNED(xe_vma_end(vma),
2327 xe_vma_max_pte_size(old));
2328 if (op->remap.skip_prev) {
2329 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2330 op->remap.range -=
2331 xe_vma_end(vma) -
2332 xe_vma_start(old);
2333 op->remap.start = xe_vma_end(vma);
2334 }
b06d47be 2335 }
617eebb9
MB
2336
2337 if (op->base.remap.next) {
3b97e3b2
MB
2338 flags |= op->base.remap.unmap->va->flags &
2339 XE_VMA_READ_ONLY ?
2340 VMA_CREATE_FLAG_READ_ONLY : 0;
2341 flags |= op->base.remap.unmap->va->flags &
2342 DRM_GPUVA_SPARSE ?
2343 VMA_CREATE_FLAG_IS_NULL : 0;
2344
2345 vma = new_vma(vm, op->base.remap.next,
2346 old->pat_index, flags);
f3e9b1f4
MB
2347 if (IS_ERR(vma))
2348 return PTR_ERR(vma);
dd08ebf6 2349
617eebb9
MB
2350 op->remap.next = vma;
2351
2352 /*
2353 * Userptr creates a new SG mapping so
2354 * we must also rebind.
2355 */
2356 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2357 IS_ALIGNED(xe_vma_start(vma),
2358 xe_vma_max_pte_size(old));
2359 if (op->remap.skip_next) {
2360 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2361 op->remap.range -=
2362 xe_vma_end(old) -
2363 xe_vma_start(vma);
8f33b4f0 2364 }
b06d47be 2365 }
617eebb9
MB
2366 break;
2367 }
2368 case DRM_GPUVA_OP_UNMAP:
2369 case DRM_GPUVA_OP_PREFETCH:
2370 /* Nothing to do */
2371 break;
2372 default:
5c0553cd 2373 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
dd08ebf6 2374 }
dd08ebf6 2375
617eebb9
MB
2376 last_op = op;
2377
2378 err = xe_vma_op_commit(vm, op);
2379 if (err)
f3e9b1f4 2380 return err;
dd08ebf6 2381 }
dd08ebf6 2382
617eebb9
MB
2383 /* FIXME: Unhandled corner case */
2384 XE_WARN_ON(!last_op && last && !list_empty(ops_list));
dd08ebf6 2385
617eebb9 2386 if (!last_op)
f3e9b1f4
MB
2387 return 0;
2388
617eebb9
MB
2389 last_op->ops = ops;
2390 if (last) {
2391 last_op->flags |= XE_VMA_OP_LAST;
2392 last_op->num_syncs = num_syncs;
2393 last_op->syncs = syncs;
617eebb9 2394 }
dd08ebf6 2395
dd08ebf6 2396 return 0;
dd08ebf6
MB
2397}
2398
1f727182
TH
2399static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2400 struct xe_vma *vma, struct xe_vma_op *op)
dd08ebf6
MB
2401{
2402 int err;
2403
b06d47be 2404 lockdep_assert_held_write(&vm->lock);
dd08ebf6 2405
1f727182
TH
2406 err = xe_vm_prepare_vma(exec, vma, 1);
2407 if (err)
b06d47be 2408 return err;
dd08ebf6 2409
b06d47be
MB
2410 xe_vm_assert_held(vm);
2411 xe_bo_assert_held(xe_vma_bo(vma));
dd08ebf6 2412
b06d47be
MB
2413 switch (op->base.op) {
2414 case DRM_GPUVA_OP_MAP:
9b9529ce 2415 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
f3e9b1f4 2416 op->syncs, op->num_syncs,
b06d47be
MB
2417 op->map.immediate || !xe_vm_in_fault_mode(vm),
2418 op->flags & XE_VMA_OP_FIRST,
2419 op->flags & XE_VMA_OP_LAST);
2420 break;
2421 case DRM_GPUVA_OP_REMAP:
2422 {
2423 bool prev = !!op->remap.prev;
2424 bool next = !!op->remap.next;
2425
2426 if (!op->remap.unmap_done) {
f3e9b1f4 2427 if (prev || next)
b06d47be 2428 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
9b9529ce 2429 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
b06d47be 2430 op->num_syncs,
b06d47be 2431 op->flags & XE_VMA_OP_FIRST,
f3e9b1f4
MB
2432 op->flags & XE_VMA_OP_LAST &&
2433 !prev && !next);
dd08ebf6 2434 if (err)
b06d47be
MB
2435 break;
2436 op->remap.unmap_done = true;
dd08ebf6 2437 }
dd08ebf6 2438
b06d47be
MB
2439 if (prev) {
2440 op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
9b9529ce 2441 err = xe_vm_bind(vm, op->remap.prev, op->q,
b06d47be 2442 xe_vma_bo(op->remap.prev), op->syncs,
f3e9b1f4 2443 op->num_syncs, true, false,
b06d47be
MB
2444 op->flags & XE_VMA_OP_LAST && !next);
2445 op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
dd08ebf6 2446 if (err)
b06d47be
MB
2447 break;
2448 op->remap.prev = NULL;
dd08ebf6 2449 }
dd08ebf6 2450
b06d47be
MB
2451 if (next) {
2452 op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
9b9529ce 2453 err = xe_vm_bind(vm, op->remap.next, op->q,
b06d47be
MB
2454 xe_vma_bo(op->remap.next),
2455 op->syncs, op->num_syncs,
f3e9b1f4 2456 true, false,
b06d47be
MB
2457 op->flags & XE_VMA_OP_LAST);
2458 op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2459 if (err)
2460 break;
2461 op->remap.next = NULL;
dd08ebf6 2462 }
b06d47be
MB
2463
2464 break;
dd08ebf6 2465 }
b06d47be 2466 case DRM_GPUVA_OP_UNMAP:
9b9529ce 2467 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
f3e9b1f4 2468 op->num_syncs, op->flags & XE_VMA_OP_FIRST,
b06d47be
MB
2469 op->flags & XE_VMA_OP_LAST);
2470 break;
2471 case DRM_GPUVA_OP_PREFETCH:
9b9529ce 2472 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
f3e9b1f4 2473 op->syncs, op->num_syncs,
b06d47be
MB
2474 op->flags & XE_VMA_OP_FIRST,
2475 op->flags & XE_VMA_OP_LAST);
2476 break;
2477 default:
5c0553cd 2478 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
dd08ebf6
MB
2479 }
2480
1f727182
TH
2481 if (err)
2482 trace_xe_vma_fail(vma);
2483
2484 return err;
2485}
2486
2487static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2488 struct xe_vma_op *op)
2489{
2490 struct drm_exec exec;
2491 int err;
2492
2493retry_userptr:
2494 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
2495 drm_exec_until_all_locked(&exec) {
2496 err = op_execute(&exec, vm, vma, op);
2497 drm_exec_retry_on_contention(&exec);
2498 if (err)
2499 break;
2500 }
2501 drm_exec_fini(&exec);
2502
b06d47be
MB
2503 if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2504 lockdep_assert_held_write(&vm->lock);
2505 err = xe_vma_userptr_pin_pages(vma);
2506 if (!err)
1f727182 2507 goto retry_userptr;
dd08ebf6 2508
b06d47be 2509 trace_xe_vma_fail(vma);
1f727182 2510 }
b06d47be
MB
2511
2512 return err;
dd08ebf6
MB
2513}
2514
b06d47be 2515static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
dd08ebf6 2516{
b06d47be 2517 int ret = 0;
dd08ebf6 2518
b06d47be 2519 lockdep_assert_held_write(&vm->lock);
dd08ebf6 2520
b06d47be
MB
2521#ifdef TEST_VM_ASYNC_OPS_ERROR
2522 if (op->inject_error) {
2523 op->inject_error = false;
2524 return -ENOMEM;
dd08ebf6 2525 }
b06d47be 2526#endif
dd08ebf6 2527
b06d47be
MB
2528 switch (op->base.op) {
2529 case DRM_GPUVA_OP_MAP:
2530 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2531 break;
2532 case DRM_GPUVA_OP_REMAP:
2533 {
2534 struct xe_vma *vma;
2535
2536 if (!op->remap.unmap_done)
2537 vma = gpuva_to_vma(op->base.remap.unmap->va);
2538 else if (op->remap.prev)
2539 vma = op->remap.prev;
2540 else
2541 vma = op->remap.next;
2542
2543 ret = __xe_vma_op_execute(vm, vma, op);
2544 break;
2545 }
2546 case DRM_GPUVA_OP_UNMAP:
2547 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2548 op);
2549 break;
2550 case DRM_GPUVA_OP_PREFETCH:
2551 ret = __xe_vma_op_execute(vm,
2552 gpuva_to_vma(op->base.prefetch.va),
2553 op);
2554 break;
2555 default:
5c0553cd 2556 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
dd08ebf6
MB
2557 }
2558
b06d47be
MB
2559 return ret;
2560}
dd08ebf6 2561
b06d47be
MB
2562static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2563{
2564 bool last = op->flags & XE_VMA_OP_LAST;
dd08ebf6 2565
b06d47be
MB
2566 if (last) {
2567 while (op->num_syncs--)
2568 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2569 kfree(op->syncs);
9b9529ce
FD
2570 if (op->q)
2571 xe_exec_queue_put(op->q);
b06d47be 2572 }
f3e9b1f4 2573 if (!list_empty(&op->link))
b06d47be 2574 list_del(&op->link);
b06d47be
MB
2575 if (op->ops)
2576 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2577 if (last)
2578 xe_vm_put(vm);
dd08ebf6
MB
2579}
2580
b06d47be 2581static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
5ef091fc
MB
2582 bool post_commit, bool prev_post_commit,
2583 bool next_post_commit)
dd08ebf6 2584{
b06d47be 2585 lockdep_assert_held_write(&vm->lock);
dd08ebf6 2586
b06d47be
MB
2587 switch (op->base.op) {
2588 case DRM_GPUVA_OP_MAP:
2589 if (op->map.vma) {
2590 prep_vma_destroy(vm, op->map.vma, post_commit);
2591 xe_vma_destroy_unlocked(op->map.vma);
2592 }
2593 break;
2594 case DRM_GPUVA_OP_UNMAP:
2595 {
2596 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
dd08ebf6 2597
617eebb9
MB
2598 if (vma) {
2599 down_read(&vm->userptr.notifier_lock);
2600 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2601 up_read(&vm->userptr.notifier_lock);
2602 if (post_commit)
2603 xe_vm_insert_vma(vm, vma);
2604 }
b06d47be
MB
2605 break;
2606 }
2607 case DRM_GPUVA_OP_REMAP:
2608 {
2609 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
dd08ebf6 2610
b06d47be 2611 if (op->remap.prev) {
5ef091fc 2612 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
b06d47be
MB
2613 xe_vma_destroy_unlocked(op->remap.prev);
2614 }
2615 if (op->remap.next) {
5ef091fc 2616 prep_vma_destroy(vm, op->remap.next, next_post_commit);
b06d47be
MB
2617 xe_vma_destroy_unlocked(op->remap.next);
2618 }
617eebb9
MB
2619 if (vma) {
2620 down_read(&vm->userptr.notifier_lock);
2621 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2622 up_read(&vm->userptr.notifier_lock);
2623 if (post_commit)
2624 xe_vm_insert_vma(vm, vma);
2625 }
b06d47be
MB
2626 break;
2627 }
2628 case DRM_GPUVA_OP_PREFETCH:
2629 /* Nothing to do */
2630 break;
2631 default:
5c0553cd 2632 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
dd08ebf6 2633 }
b06d47be 2634}
dd08ebf6 2635
b06d47be
MB
2636static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2637 struct drm_gpuva_ops **ops,
2638 int num_ops_list)
2639{
2640 int i;
2641
617eebb9 2642 for (i = num_ops_list - 1; i; ++i) {
b06d47be
MB
2643 struct drm_gpuva_ops *__ops = ops[i];
2644 struct drm_gpuva_op *__op;
2645
2646 if (!__ops)
2647 continue;
2648
617eebb9 2649 drm_gpuva_for_each_op_reverse(__op, __ops) {
b06d47be
MB
2650 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2651
617eebb9
MB
2652 xe_vma_op_unwind(vm, op,
2653 op->flags & XE_VMA_OP_COMMITTED,
2654 op->flags & XE_VMA_OP_PREV_COMMITTED,
2655 op->flags & XE_VMA_OP_NEXT_COMMITTED);
b06d47be 2656 }
617eebb9
MB
2657
2658 drm_gpuva_ops_free(&vm->gpuvm, __ops);
b06d47be 2659 }
dd08ebf6
MB
2660}
2661
f3e9b1f4
MB
2662static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2663 struct list_head *ops_list)
2664{
2665 struct xe_vma_op *op, *next;
2666 int err;
2667
2668 lockdep_assert_held_write(&vm->lock);
2669
2670 list_for_each_entry_safe(op, next, ops_list, link) {
2671 err = xe_vma_op_execute(vm, op);
2672 if (err) {
2673 drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2674 op->base.op, err);
2675 /*
2676 * FIXME: Killing VM rather than proper error handling
2677 */
2678 xe_vm_kill(vm);
2679 return -ENOSPC;
2680 }
2681 xe_vma_op_cleanup(vm, op);
2682 }
2683
2684 return 0;
2685}
2686
dd08ebf6
MB
2687#ifdef TEST_VM_ASYNC_OPS_ERROR
2688#define SUPPORTED_FLAGS \
d3d76739
MB
2689 (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
2690 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
dd08ebf6
MB
2691#else
2692#define SUPPORTED_FLAGS \
d3d76739 2693 (DRM_XE_VM_BIND_FLAG_READONLY | \
d5dc73db 2694 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
f3e9b1f4 2695 0xffff)
dd08ebf6
MB
2696#endif
2697#define XE_64K_PAGE_MASK 0xffffull
d3d76739 2698#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
dd08ebf6
MB
2699
2700#define MAX_BINDS 512 /* FIXME: Picking random upper limit */
2701
2702static int vm_bind_ioctl_check_args(struct xe_device *xe,
2703 struct drm_xe_vm_bind *args,
d3d76739 2704 struct drm_xe_vm_bind_op **bind_ops)
dd08ebf6
MB
2705{
2706 int err;
2707 int i;
2708
7a56bd0c
RV
2709 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2710 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2711 return -EINVAL;
2712
b8c1ba83 2713 if (XE_IOCTL_DBG(xe, args->extensions) ||
b8c1ba83 2714 XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
dd08ebf6
MB
2715 return -EINVAL;
2716
2717 if (args->num_binds > 1) {
2718 u64 __user *bind_user =
2719 u64_to_user_ptr(args->vector_of_binds);
2720
2721 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
2722 args->num_binds, GFP_KERNEL);
2723 if (!*bind_ops)
2724 return -ENOMEM;
2725
2726 err = __copy_from_user(*bind_ops, bind_user,
2727 sizeof(struct drm_xe_vm_bind_op) *
2728 args->num_binds);
b8c1ba83 2729 if (XE_IOCTL_DBG(xe, err)) {
dd08ebf6
MB
2730 err = -EFAULT;
2731 goto free_bind_ops;
2732 }
2733 } else {
2734 *bind_ops = &args->bind;
2735 }
2736
2737 for (i = 0; i < args->num_binds; ++i) {
2738 u64 range = (*bind_ops)[i].range;
2739 u64 addr = (*bind_ops)[i].addr;
2740 u32 op = (*bind_ops)[i].op;
ea0640fc 2741 u32 flags = (*bind_ops)[i].flags;
dd08ebf6
MB
2742 u32 obj = (*bind_ops)[i].obj;
2743 u64 obj_offset = (*bind_ops)[i].obj_offset;
aaa115ff 2744 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
d5dc73db 2745 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
e1fbc4f1
MA
2746 u16 pat_index = (*bind_ops)[i].pat_index;
2747 u16 coh_mode;
2748
2749 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2750 err = -EINVAL;
2751 goto free_bind_ops;
2752 }
2753
2754 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2755 (*bind_ops)[i].pat_index = pat_index;
2756 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2757 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2758 err = -EINVAL;
2759 goto free_bind_ops;
2760 }
2761
2762 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2763 err = -EINVAL;
2764 goto free_bind_ops;
2765 }
1799c761 2766
d5dc73db 2767 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
ea0640fc 2768 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
b8c1ba83
FD
2769 XE_IOCTL_DBG(xe, obj && is_null) ||
2770 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
d5dc73db 2771 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
37430402 2772 is_null) ||
b8c1ba83 2773 XE_IOCTL_DBG(xe, !obj &&
d5dc73db 2774 op == DRM_XE_VM_BIND_OP_MAP &&
37430402 2775 !is_null) ||
b8c1ba83 2776 XE_IOCTL_DBG(xe, !obj &&
d5dc73db 2777 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
b8c1ba83 2778 XE_IOCTL_DBG(xe, addr &&
d5dc73db 2779 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
b8c1ba83 2780 XE_IOCTL_DBG(xe, range &&
d5dc73db 2781 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
b8c1ba83 2782 XE_IOCTL_DBG(xe, obj &&
d5dc73db 2783 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
e1fbc4f1
MA
2784 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2785 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
b8c1ba83 2786 XE_IOCTL_DBG(xe, obj &&
d5dc73db 2787 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
aaa115ff 2788 XE_IOCTL_DBG(xe, prefetch_region &&
d5dc73db 2789 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
aaa115ff 2790 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
dd08ebf6 2791 xe->info.mem_region_mask)) ||
b8c1ba83 2792 XE_IOCTL_DBG(xe, obj &&
d5dc73db 2793 op == DRM_XE_VM_BIND_OP_UNMAP)) {
dd08ebf6
MB
2794 err = -EINVAL;
2795 goto free_bind_ops;
2796 }
2797
b8c1ba83
FD
2798 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2799 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2800 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
f3e9b1f4 2801 XE_IOCTL_DBG(xe, !range &&
d5dc73db 2802 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
dd08ebf6
MB
2803 err = -EINVAL;
2804 goto free_bind_ops;
2805 }
2806 }
2807
2808 return 0;
2809
2810free_bind_ops:
2811 if (args->num_binds > 1)
2812 kfree(*bind_ops);
2813 return err;
2814}
2815
eb9702ad
MB
2816static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2817 struct xe_exec_queue *q,
2818 struct xe_sync_entry *syncs,
2819 int num_syncs)
2820{
2821 struct dma_fence *fence;
2822 int i, err = 0;
2823
2824 fence = xe_sync_in_fence_get(syncs, num_syncs,
2825 to_wait_exec_queue(vm, q), vm);
2826 if (IS_ERR(fence))
2827 return PTR_ERR(fence);
2828
2829 for (i = 0; i < num_syncs; i++)
2830 xe_sync_entry_signal(&syncs[i], NULL, fence);
2831
2832 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2833 fence);
eb9702ad
MB
2834 dma_fence_put(fence);
2835
2836 return err;
2837}
2838
dd08ebf6
MB
2839int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2840{
2841 struct xe_device *xe = to_xe_device(dev);
2842 struct xe_file *xef = to_xe_file(file);
2843 struct drm_xe_vm_bind *args = data;
2844 struct drm_xe_sync __user *syncs_user;
2845 struct xe_bo **bos = NULL;
b06d47be 2846 struct drm_gpuva_ops **ops = NULL;
dd08ebf6 2847 struct xe_vm *vm;
9b9529ce 2848 struct xe_exec_queue *q = NULL;
dd08ebf6
MB
2849 u32 num_syncs;
2850 struct xe_sync_entry *syncs = NULL;
2851 struct drm_xe_vm_bind_op *bind_ops;
b06d47be 2852 LIST_HEAD(ops_list);
dd08ebf6 2853 int err;
b06d47be 2854 int i;
dd08ebf6 2855
d3d76739 2856 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
dd08ebf6
MB
2857 if (err)
2858 return err;
2859
9b9529ce
FD
2860 if (args->exec_queue_id) {
2861 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2862 if (XE_IOCTL_DBG(xe, !q)) {
dd08ebf6 2863 err = -ENOENT;
9d858b69 2864 goto free_objs;
dd08ebf6 2865 }
9d858b69 2866
9b9529ce 2867 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
dd08ebf6 2868 err = -EINVAL;
9b9529ce 2869 goto put_exec_queue;
dd08ebf6
MB
2870 }
2871 }
2872
9d858b69 2873 vm = xe_vm_lookup(xef, args->vm_id);
b8c1ba83 2874 if (XE_IOCTL_DBG(xe, !vm)) {
9d858b69 2875 err = -EINVAL;
9b9529ce 2876 goto put_exec_queue;
9d858b69
MB
2877 }
2878
2879 err = down_write_killable(&vm->lock);
2880 if (err)
2881 goto put_vm;
2882
b8c1ba83 2883 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
9d858b69
MB
2884 err = -ENOENT;
2885 goto release_vm_lock;
2886 }
2887
dd08ebf6
MB
2888 for (i = 0; i < args->num_binds; ++i) {
2889 u64 range = bind_ops[i].range;
2890 u64 addr = bind_ops[i].addr;
2891
b8c1ba83
FD
2892 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2893 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
dd08ebf6 2894 err = -EINVAL;
9d858b69 2895 goto release_vm_lock;
dd08ebf6 2896 }
dd08ebf6
MB
2897 }
2898
eb9702ad
MB
2899 if (args->num_binds) {
2900 bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
2901 if (!bos) {
2902 err = -ENOMEM;
2903 goto release_vm_lock;
2904 }
dd08ebf6 2905
eb9702ad
MB
2906 ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
2907 if (!ops) {
2908 err = -ENOMEM;
2909 goto release_vm_lock;
2910 }
dd08ebf6
MB
2911 }
2912
2913 for (i = 0; i < args->num_binds; ++i) {
2914 struct drm_gem_object *gem_obj;
2915 u64 range = bind_ops[i].range;
2916 u64 addr = bind_ops[i].addr;
2917 u32 obj = bind_ops[i].obj;
2918 u64 obj_offset = bind_ops[i].obj_offset;
e1fbc4f1
MA
2919 u16 pat_index = bind_ops[i].pat_index;
2920 u16 coh_mode;
dd08ebf6
MB
2921
2922 if (!obj)
2923 continue;
2924
2925 gem_obj = drm_gem_object_lookup(file, obj);
b8c1ba83 2926 if (XE_IOCTL_DBG(xe, !gem_obj)) {
dd08ebf6
MB
2927 err = -ENOENT;
2928 goto put_obj;
2929 }
2930 bos[i] = gem_to_xe_bo(gem_obj);
2931
b8c1ba83
FD
2932 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
2933 XE_IOCTL_DBG(xe, obj_offset >
dd08ebf6
MB
2934 bos[i]->size - range)) {
2935 err = -EINVAL;
2936 goto put_obj;
2937 }
2938
2939 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
b8c1ba83 2940 if (XE_IOCTL_DBG(xe, obj_offset &
dd08ebf6 2941 XE_64K_PAGE_MASK) ||
b8c1ba83
FD
2942 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2943 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
dd08ebf6
MB
2944 err = -EINVAL;
2945 goto put_obj;
2946 }
2947 }
e1fbc4f1
MA
2948
2949 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2950 if (bos[i]->cpu_caching) {
2951 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2952 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2953 err = -EINVAL;
2954 goto put_obj;
2955 }
2956 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2957 /*
2958 * Imported dma-buf from a different device should
2959 * require 1way or 2way coherency since we don't know
2960 * how it was mapped on the CPU. Just assume is it
2961 * potentially cached on CPU side.
2962 */
2963 err = -EINVAL;
2964 goto put_obj;
2965 }
dd08ebf6
MB
2966 }
2967
2968 if (args->num_syncs) {
2969 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
2970 if (!syncs) {
2971 err = -ENOMEM;
2972 goto put_obj;
2973 }
2974 }
2975
2976 syncs_user = u64_to_user_ptr(args->syncs);
2977 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
2978 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
53bf60f6 2979 &syncs_user[num_syncs],
eb9702ad
MB
2980 (xe_vm_in_lr_mode(vm) ?
2981 SYNC_PARSE_FLAG_LR_MODE : 0) |
2982 (!args->num_binds ?
2983 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
dd08ebf6
MB
2984 if (err)
2985 goto free_syncs;
2986 }
2987
eb9702ad
MB
2988 if (!args->num_binds) {
2989 err = -ENODATA;
2990 goto free_syncs;
2991 }
2992
dd08ebf6
MB
2993 for (i = 0; i < args->num_binds; ++i) {
2994 u64 range = bind_ops[i].range;
2995 u64 addr = bind_ops[i].addr;
2996 u32 op = bind_ops[i].op;
ea0640fc 2997 u32 flags = bind_ops[i].flags;
dd08ebf6 2998 u64 obj_offset = bind_ops[i].obj_offset;
aaa115ff 2999 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
e1fbc4f1 3000 u16 pat_index = bind_ops[i].pat_index;
dd08ebf6 3001
b06d47be 3002 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
ea0640fc 3003 addr, range, op, flags,
cad4a0d6 3004 prefetch_region, pat_index);
b06d47be
MB
3005 if (IS_ERR(ops[i])) {
3006 err = PTR_ERR(ops[i]);
3007 ops[i] = NULL;
3008 goto unwind_ops;
dd08ebf6 3009 }
617eebb9
MB
3010
3011 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3012 &ops_list,
d3d76739 3013 i == args->num_binds - 1);
617eebb9
MB
3014 if (err)
3015 goto unwind_ops;
dd08ebf6
MB
3016 }
3017
617eebb9
MB
3018 /* Nothing to do */
3019 if (list_empty(&ops_list)) {
3020 err = -ENODATA;
b06d47be 3021 goto unwind_ops;
617eebb9 3022 }
dd08ebf6 3023
f3e9b1f4
MB
3024 xe_vm_get(vm);
3025 if (q)
3026 xe_exec_queue_get(q);
3027
3028 err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3029
b06d47be 3030 up_write(&vm->lock);
dd08ebf6 3031
f3e9b1f4
MB
3032 if (q)
3033 xe_exec_queue_put(q);
3034 xe_vm_put(vm);
3035
3036 for (i = 0; bos && i < args->num_binds; ++i)
b06d47be 3037 xe_bo_put(bos[i]);
dd08ebf6 3038
b06d47be
MB
3039 kfree(bos);
3040 kfree(ops);
3041 if (args->num_binds > 1)
3042 kfree(bind_ops);
dd08ebf6 3043
b06d47be 3044 return err;
dd08ebf6 3045
b06d47be
MB
3046unwind_ops:
3047 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
dd08ebf6 3048free_syncs:
eb9702ad
MB
3049 if (err == -ENODATA)
3050 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
b06d47be 3051 while (num_syncs--)
dd08ebf6 3052 xe_sync_entry_cleanup(&syncs[num_syncs]);
dd08ebf6
MB
3053
3054 kfree(syncs);
3055put_obj:
b06d47be 3056 for (i = 0; i < args->num_binds; ++i)
dd08ebf6 3057 xe_bo_put(bos[i]);
9d858b69
MB
3058release_vm_lock:
3059 up_write(&vm->lock);
3060put_vm:
3061 xe_vm_put(vm);
9b9529ce
FD
3062put_exec_queue:
3063 if (q)
3064 xe_exec_queue_put(q);
dd08ebf6
MB
3065free_objs:
3066 kfree(bos);
b06d47be 3067 kfree(ops);
dd08ebf6
MB
3068 if (args->num_binds > 1)
3069 kfree(bind_ops);
eb9702ad 3070 return err;
dd08ebf6
MB
3071}
3072
d00e9cc2
TH
3073/**
3074 * xe_vm_lock() - Lock the vm's dma_resv object
3075 * @vm: The struct xe_vm whose lock is to be locked
3076 * @intr: Whether to perform any wait interruptible
3077 *
3078 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3079 * contended lock was interrupted. If @intr is false, the function
3080 * always returns 0.
dd08ebf6 3081 */
d00e9cc2 3082int xe_vm_lock(struct xe_vm *vm, bool intr)
dd08ebf6 3083{
d00e9cc2
TH
3084 if (intr)
3085 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
dd08ebf6 3086
d00e9cc2 3087 return dma_resv_lock(xe_vm_resv(vm), NULL);
dd08ebf6
MB
3088}
3089
d00e9cc2
TH
3090/**
3091 * xe_vm_unlock() - Unlock the vm's dma_resv object
3092 * @vm: The struct xe_vm whose lock is to be released.
3093 *
3094 * Unlock a buffer object lock that was locked by xe_vm_lock().
3095 */
3096void xe_vm_unlock(struct xe_vm *vm)
dd08ebf6 3097{
b06d47be 3098 dma_resv_unlock(xe_vm_resv(vm));
dd08ebf6
MB
3099}
3100
3101/**
3102 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3103 * @vma: VMA to invalidate
3104 *
3105 * Walks a list of page tables leaves which it memset the entries owned by this
3106 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3107 * complete.
3108 *
3109 * Returns 0 for success, negative error code otherwise.
3110 */
3111int xe_vm_invalidate_vma(struct xe_vma *vma)
3112{
21ed3327 3113 struct xe_device *xe = xe_vma_vm(vma)->xe;
876611c2
MR
3114 struct xe_tile *tile;
3115 u32 tile_needs_invalidate = 0;
a5edc7cd 3116 int seqno[XE_MAX_TILES_PER_DEVICE];
dd08ebf6
MB
3117 u8 id;
3118 int ret;
3119
c73acc1e
FD
3120 xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
3121 xe_assert(xe, !xe_vma_is_null(vma));
dd08ebf6
MB
3122 trace_xe_vma_usm_invalidate(vma);
3123
3124 /* Check that we don't race with page-table updates */
3125 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3126 if (xe_vma_is_userptr(vma)) {
3127 WARN_ON_ONCE(!mmu_interval_check_retry
3128 (&vma->userptr.notifier,
3129 vma->userptr.notifier_seq));
b06d47be 3130 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
dd08ebf6
MB
3131 DMA_RESV_USAGE_BOOKKEEP));
3132
3133 } else {
21ed3327 3134 xe_bo_assert_held(xe_vma_bo(vma));
dd08ebf6
MB
3135 }
3136 }
3137
876611c2
MR
3138 for_each_tile(tile, xe, id) {
3139 if (xe_pt_zap_ptes(tile, vma)) {
3140 tile_needs_invalidate |= BIT(id);
dd08ebf6 3141 xe_device_wmb(xe);
876611c2
MR
3142 /*
3143 * FIXME: We potentially need to invalidate multiple
3144 * GTs within the tile
3145 */
f6929e80 3146 seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
dd08ebf6
MB
3147 if (seqno[id] < 0)
3148 return seqno[id];
3149 }
3150 }
3151
876611c2
MR
3152 for_each_tile(tile, xe, id) {
3153 if (tile_needs_invalidate & BIT(id)) {
f6929e80 3154 ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
dd08ebf6
MB
3155 if (ret < 0)
3156 return ret;
3157 }
3158 }
3159
876611c2 3160 vma->usm.tile_invalidated = vma->tile_mask;
dd08ebf6
MB
3161
3162 return 0;
3163}
3164
dd08ebf6
MB
3165int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3166{
b06d47be 3167 struct drm_gpuva *gpuva;
2a8477f7 3168 bool is_vram;
dd08ebf6
MB
3169 uint64_t addr;
3170
3171 if (!down_read_trylock(&vm->lock)) {
3172 drm_printf(p, " Failed to acquire VM lock to dump capture");
3173 return 0;
3174 }
3175 if (vm->pt_root[gt_id]) {
937b4be7
LDM
3176 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3177 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3178 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3179 is_vram ? "VRAM" : "SYS");
dd08ebf6
MB
3180 }
3181
b06d47be
MB
3182 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3183 struct xe_vma *vma = gpuva_to_vma(gpuva);
dd08ebf6 3184 bool is_userptr = xe_vma_is_userptr(vma);
37430402 3185 bool is_null = xe_vma_is_null(vma);
dd08ebf6 3186
37430402
MB
3187 if (is_null) {
3188 addr = 0;
3189 } else if (is_userptr) {
dd08ebf6
MB
3190 struct xe_res_cursor cur;
3191
790bdc7c
MB
3192 if (vma->userptr.sg) {
3193 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3194 &cur);
3195 addr = xe_res_dma(&cur);
3196 } else {
3197 addr = 0;
3198 }
dd08ebf6 3199 } else {
937b4be7
LDM
3200 addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3201 is_vram = xe_bo_is_vram(xe_vma_bo(vma));
dd08ebf6
MB
3202 }
3203 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
21ed3327
MB
3204 xe_vma_start(vma), xe_vma_end(vma) - 1,
3205 xe_vma_size(vma),
37430402
MB
3206 addr, is_null ? "NULL" : is_userptr ? "USR" :
3207 is_vram ? "VRAM" : "SYS");
dd08ebf6
MB
3208 }
3209 up_read(&vm->lock);
3210
3211 return 0;
3212}