Merge drm/drm-next into drm-xe-next
[linux-block.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_execbuf_util.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 #include <linux/delay.h>
17 #include <linux/kthread.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20
21 #include "xe_assert.h"
22 #include "xe_bo.h"
23 #include "xe_device.h"
24 #include "xe_drm_client.h"
25 #include "xe_exec_queue.h"
26 #include "xe_gt.h"
27 #include "xe_gt_pagefault.h"
28 #include "xe_gt_tlb_invalidation.h"
29 #include "xe_migrate.h"
30 #include "xe_pat.h"
31 #include "xe_pm.h"
32 #include "xe_preempt_fence.h"
33 #include "xe_pt.h"
34 #include "xe_res_cursor.h"
35 #include "xe_sync.h"
36 #include "xe_trace.h"
37 #include "generated/xe_wa_oob.h"
38 #include "xe_wa.h"
39
40 #define TEST_VM_ASYNC_OPS_ERROR
41
42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43 {
44         return vm->gpuvm.r_obj;
45 }
46
47 /**
48  * xe_vma_userptr_check_repin() - Advisory check for repin needed
49  * @vma: The userptr vma
50  *
51  * Check if the userptr vma has been invalidated since last successful
52  * repin. The check is advisory only and can the function can be called
53  * without the vm->userptr.notifier_lock held. There is no guarantee that the
54  * vma userptr will remain valid after a lockless check, so typically
55  * the call needs to be followed by a proper check under the notifier_lock.
56  *
57  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58  */
59 int xe_vma_userptr_check_repin(struct xe_vma *vma)
60 {
61         return mmu_interval_check_retry(&vma->userptr.notifier,
62                                         vma->userptr.notifier_seq) ?
63                 -EAGAIN : 0;
64 }
65
66 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
67 {
68         struct xe_vm *vm = xe_vma_vm(vma);
69         struct xe_device *xe = vm->xe;
70         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
71         struct page **pages;
72         bool in_kthread = !current->mm;
73         unsigned long notifier_seq;
74         int pinned, ret, i;
75         bool read_only = xe_vma_read_only(vma);
76
77         lockdep_assert_held(&vm->lock);
78         xe_assert(xe, xe_vma_is_userptr(vma));
79 retry:
80         if (vma->gpuva.flags & XE_VMA_DESTROYED)
81                 return 0;
82
83         notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
84         if (notifier_seq == vma->userptr.notifier_seq)
85                 return 0;
86
87         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
88         if (!pages)
89                 return -ENOMEM;
90
91         if (vma->userptr.sg) {
92                 dma_unmap_sgtable(xe->drm.dev,
93                                   vma->userptr.sg,
94                                   read_only ? DMA_TO_DEVICE :
95                                   DMA_BIDIRECTIONAL, 0);
96                 sg_free_table(vma->userptr.sg);
97                 vma->userptr.sg = NULL;
98         }
99
100         pinned = ret = 0;
101         if (in_kthread) {
102                 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
103                         ret = -EFAULT;
104                         goto mm_closed;
105                 }
106                 kthread_use_mm(vma->userptr.notifier.mm);
107         }
108
109         while (pinned < num_pages) {
110                 ret = get_user_pages_fast(xe_vma_userptr(vma) +
111                                           pinned * PAGE_SIZE,
112                                           num_pages - pinned,
113                                           read_only ? 0 : FOLL_WRITE,
114                                           &pages[pinned]);
115                 if (ret < 0) {
116                         if (in_kthread)
117                                 ret = 0;
118                         break;
119                 }
120
121                 pinned += ret;
122                 ret = 0;
123         }
124
125         if (in_kthread) {
126                 kthread_unuse_mm(vma->userptr.notifier.mm);
127                 mmput(vma->userptr.notifier.mm);
128         }
129 mm_closed:
130         if (ret)
131                 goto out;
132
133         ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
134                                                 pinned, 0,
135                                                 (u64)pinned << PAGE_SHIFT,
136                                                 xe_sg_segment_size(xe->drm.dev),
137                                                 GFP_KERNEL);
138         if (ret) {
139                 vma->userptr.sg = NULL;
140                 goto out;
141         }
142         vma->userptr.sg = &vma->userptr.sgt;
143
144         ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
145                               read_only ? DMA_TO_DEVICE :
146                               DMA_BIDIRECTIONAL,
147                               DMA_ATTR_SKIP_CPU_SYNC |
148                               DMA_ATTR_NO_KERNEL_MAPPING);
149         if (ret) {
150                 sg_free_table(vma->userptr.sg);
151                 vma->userptr.sg = NULL;
152                 goto out;
153         }
154
155         for (i = 0; i < pinned; ++i) {
156                 if (!read_only) {
157                         lock_page(pages[i]);
158                         set_page_dirty(pages[i]);
159                         unlock_page(pages[i]);
160                 }
161
162                 mark_page_accessed(pages[i]);
163         }
164
165 out:
166         release_pages(pages, pinned);
167         kvfree(pages);
168
169         if (!(ret < 0)) {
170                 vma->userptr.notifier_seq = notifier_seq;
171                 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
172                         goto retry;
173         }
174
175         return ret < 0 ? ret : 0;
176 }
177
178 static bool preempt_fences_waiting(struct xe_vm *vm)
179 {
180         struct xe_exec_queue *q;
181
182         lockdep_assert_held(&vm->lock);
183         xe_vm_assert_held(vm);
184
185         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
186                 if (!q->compute.pfence ||
187                     (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
188                                                    &q->compute.pfence->flags))) {
189                         return true;
190                 }
191         }
192
193         return false;
194 }
195
196 static void free_preempt_fences(struct list_head *list)
197 {
198         struct list_head *link, *next;
199
200         list_for_each_safe(link, next, list)
201                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
202 }
203
204 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
205                                 unsigned int *count)
206 {
207         lockdep_assert_held(&vm->lock);
208         xe_vm_assert_held(vm);
209
210         if (*count >= vm->preempt.num_exec_queues)
211                 return 0;
212
213         for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
214                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
215
216                 if (IS_ERR(pfence))
217                         return PTR_ERR(pfence);
218
219                 list_move_tail(xe_preempt_fence_link(pfence), list);
220         }
221
222         return 0;
223 }
224
225 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
226 {
227         struct xe_exec_queue *q;
228
229         xe_vm_assert_held(vm);
230
231         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
232                 if (q->compute.pfence) {
233                         long timeout = dma_fence_wait(q->compute.pfence, false);
234
235                         if (timeout < 0)
236                                 return -ETIME;
237                         dma_fence_put(q->compute.pfence);
238                         q->compute.pfence = NULL;
239                 }
240         }
241
242         return 0;
243 }
244
245 static bool xe_vm_is_idle(struct xe_vm *vm)
246 {
247         struct xe_exec_queue *q;
248
249         xe_vm_assert_held(vm);
250         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
251                 if (!xe_exec_queue_is_idle(q))
252                         return false;
253         }
254
255         return true;
256 }
257
258 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
259 {
260         struct list_head *link;
261         struct xe_exec_queue *q;
262
263         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
264                 struct dma_fence *fence;
265
266                 link = list->next;
267                 xe_assert(vm->xe, link != list);
268
269                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
270                                              q, q->compute.context,
271                                              ++q->compute.seqno);
272                 dma_fence_put(q->compute.pfence);
273                 q->compute.pfence = fence;
274         }
275 }
276
277 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
278 {
279         struct xe_exec_queue *q;
280         int err;
281
282         if (!vm->preempt.num_exec_queues)
283                 return 0;
284
285         err = xe_bo_lock(bo, true);
286         if (err)
287                 return err;
288
289         err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
290         if (err)
291                 goto out_unlock;
292
293         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
294                 if (q->compute.pfence) {
295                         dma_resv_add_fence(bo->ttm.base.resv,
296                                            q->compute.pfence,
297                                            DMA_RESV_USAGE_BOOKKEEP);
298                 }
299
300 out_unlock:
301         xe_bo_unlock(bo);
302         return err;
303 }
304
305 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
306                                                 struct drm_exec *exec)
307 {
308         struct xe_exec_queue *q;
309
310         lockdep_assert_held(&vm->lock);
311         xe_vm_assert_held(vm);
312
313         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
314                 q->ops->resume(q);
315
316                 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
317                                          DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
318         }
319 }
320
321 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
322 {
323         struct drm_gpuvm_exec vm_exec = {
324                 .vm = &vm->gpuvm,
325                 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
326                 .num_fences = 1,
327         };
328         struct drm_exec *exec = &vm_exec.exec;
329         struct dma_fence *pfence;
330         int err;
331         bool wait;
332
333         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
334
335         down_write(&vm->lock);
336         err = drm_gpuvm_exec_lock(&vm_exec);
337         if (err)
338                 goto out_up_write;
339
340         pfence = xe_preempt_fence_create(q, q->compute.context,
341                                          ++q->compute.seqno);
342         if (!pfence) {
343                 err = -ENOMEM;
344                 goto out_fini;
345         }
346
347         list_add(&q->compute.link, &vm->preempt.exec_queues);
348         ++vm->preempt.num_exec_queues;
349         q->compute.pfence = pfence;
350
351         down_read(&vm->userptr.notifier_lock);
352
353         drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
354                                  DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
355
356         /*
357          * Check to see if a preemption on VM is in flight or userptr
358          * invalidation, if so trigger this preempt fence to sync state with
359          * other preempt fences on the VM.
360          */
361         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
362         if (wait)
363                 dma_fence_enable_sw_signaling(pfence);
364
365         up_read(&vm->userptr.notifier_lock);
366
367 out_fini:
368         drm_exec_fini(exec);
369 out_up_write:
370         up_write(&vm->lock);
371
372         return err;
373 }
374
375 /**
376  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
377  * @vm: The VM.
378  * @q: The exec_queue
379  */
380 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
381 {
382         if (!xe_vm_in_preempt_fence_mode(vm))
383                 return;
384
385         down_write(&vm->lock);
386         list_del(&q->compute.link);
387         --vm->preempt.num_exec_queues;
388         if (q->compute.pfence) {
389                 dma_fence_enable_sw_signaling(q->compute.pfence);
390                 dma_fence_put(q->compute.pfence);
391                 q->compute.pfence = NULL;
392         }
393         up_write(&vm->lock);
394 }
395
396 /**
397  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
398  * that need repinning.
399  * @vm: The VM.
400  *
401  * This function checks for whether the VM has userptrs that need repinning,
402  * and provides a release-type barrier on the userptr.notifier_lock after
403  * checking.
404  *
405  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
406  */
407 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
408 {
409         lockdep_assert_held_read(&vm->userptr.notifier_lock);
410
411         return (list_empty(&vm->userptr.repin_list) &&
412                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
413 }
414
415 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
416
417 static void xe_vm_kill(struct xe_vm *vm)
418 {
419         struct xe_exec_queue *q;
420
421         lockdep_assert_held(&vm->lock);
422
423         xe_vm_lock(vm, false);
424         vm->flags |= XE_VM_FLAG_BANNED;
425         trace_xe_vm_kill(vm);
426
427         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
428                 q->ops->kill(q);
429         xe_vm_unlock(vm);
430
431         /* TODO: Inform user the VM is banned */
432 }
433
434 /**
435  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
436  * @exec: The drm_exec object used for locking before validation.
437  * @err: The error returned from ttm_bo_validate().
438  * @end: A ktime_t cookie that should be set to 0 before first use and
439  * that should be reused on subsequent calls.
440  *
441  * With multiple active VMs, under memory pressure, it is possible that
442  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
443  * Until ttm properly handles locking in such scenarios, best thing the
444  * driver can do is retry with a timeout. Check if that is necessary, and
445  * if so unlock the drm_exec's objects while keeping the ticket to prepare
446  * for a rerun.
447  *
448  * Return: true if a retry after drm_exec_init() is recommended;
449  * false otherwise.
450  */
451 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
452 {
453         ktime_t cur;
454
455         if (err != -ENOMEM)
456                 return false;
457
458         cur = ktime_get();
459         *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
460         if (!ktime_before(cur, *end))
461                 return false;
462
463         msleep(20);
464         return true;
465 }
466
467 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
468 {
469         struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
470         struct drm_gpuva *gpuva;
471         int ret;
472
473         lockdep_assert_held(&vm->lock);
474         drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
475                 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
476                                &vm->rebind_list);
477
478         ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
479         if (ret)
480                 return ret;
481
482         vm_bo->evicted = false;
483         return 0;
484 }
485
486 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
487                                  bool *done)
488 {
489         int err;
490
491         /*
492          * 1 fence for each preempt fence plus a fence for each tile from a
493          * possible rebind
494          */
495         err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
496                                    vm->xe->info.tile_count);
497         if (err)
498                 return err;
499
500         if (xe_vm_is_idle(vm)) {
501                 vm->preempt.rebind_deactivated = true;
502                 *done = true;
503                 return 0;
504         }
505
506         if (!preempt_fences_waiting(vm)) {
507                 *done = true;
508                 return 0;
509         }
510
511         err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
512         if (err)
513                 return err;
514
515         err = wait_for_existing_preempt_fences(vm);
516         if (err)
517                 return err;
518
519         return drm_gpuvm_validate(&vm->gpuvm, exec);
520 }
521
522 static void preempt_rebind_work_func(struct work_struct *w)
523 {
524         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
525         struct drm_exec exec;
526         struct dma_fence *rebind_fence;
527         unsigned int fence_count = 0;
528         LIST_HEAD(preempt_fences);
529         ktime_t end = 0;
530         int err = 0;
531         long wait;
532         int __maybe_unused tries = 0;
533
534         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
535         trace_xe_vm_rebind_worker_enter(vm);
536
537         down_write(&vm->lock);
538
539         if (xe_vm_is_closed_or_banned(vm)) {
540                 up_write(&vm->lock);
541                 trace_xe_vm_rebind_worker_exit(vm);
542                 return;
543         }
544
545 retry:
546         if (xe_vm_userptr_check_repin(vm)) {
547                 err = xe_vm_userptr_pin(vm);
548                 if (err)
549                         goto out_unlock_outer;
550         }
551
552         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
553
554         drm_exec_until_all_locked(&exec) {
555                 bool done = false;
556
557                 err = xe_preempt_work_begin(&exec, vm, &done);
558                 drm_exec_retry_on_contention(&exec);
559                 if (err || done) {
560                         drm_exec_fini(&exec);
561                         if (err && xe_vm_validate_should_retry(&exec, err, &end))
562                                 err = -EAGAIN;
563
564                         goto out_unlock_outer;
565                 }
566         }
567
568         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
569         if (err)
570                 goto out_unlock;
571
572         rebind_fence = xe_vm_rebind(vm, true);
573         if (IS_ERR(rebind_fence)) {
574                 err = PTR_ERR(rebind_fence);
575                 goto out_unlock;
576         }
577
578         if (rebind_fence) {
579                 dma_fence_wait(rebind_fence, false);
580                 dma_fence_put(rebind_fence);
581         }
582
583         /* Wait on munmap style VM unbinds */
584         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
585                                      DMA_RESV_USAGE_KERNEL,
586                                      false, MAX_SCHEDULE_TIMEOUT);
587         if (wait <= 0) {
588                 err = -ETIME;
589                 goto out_unlock;
590         }
591
592 #define retry_required(__tries, __vm) \
593         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
594         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
595         __xe_vm_userptr_needs_repin(__vm))
596
597         down_read(&vm->userptr.notifier_lock);
598         if (retry_required(tries, vm)) {
599                 up_read(&vm->userptr.notifier_lock);
600                 err = -EAGAIN;
601                 goto out_unlock;
602         }
603
604 #undef retry_required
605
606         spin_lock(&vm->xe->ttm.lru_lock);
607         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
608         spin_unlock(&vm->xe->ttm.lru_lock);
609
610         /* Point of no return. */
611         arm_preempt_fences(vm, &preempt_fences);
612         resume_and_reinstall_preempt_fences(vm, &exec);
613         up_read(&vm->userptr.notifier_lock);
614
615 out_unlock:
616         drm_exec_fini(&exec);
617 out_unlock_outer:
618         if (err == -EAGAIN) {
619                 trace_xe_vm_rebind_worker_retry(vm);
620                 goto retry;
621         }
622
623         if (err) {
624                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
625                 xe_vm_kill(vm);
626         }
627         up_write(&vm->lock);
628
629         free_preempt_fences(&preempt_fences);
630
631         trace_xe_vm_rebind_worker_exit(vm);
632 }
633
634 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
635                                    const struct mmu_notifier_range *range,
636                                    unsigned long cur_seq)
637 {
638         struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
639         struct xe_vm *vm = xe_vma_vm(vma);
640         struct dma_resv_iter cursor;
641         struct dma_fence *fence;
642         long err;
643
644         xe_assert(vm->xe, xe_vma_is_userptr(vma));
645         trace_xe_vma_userptr_invalidate(vma);
646
647         if (!mmu_notifier_range_blockable(range))
648                 return false;
649
650         down_write(&vm->userptr.notifier_lock);
651         mmu_interval_set_seq(mni, cur_seq);
652
653         /* No need to stop gpu access if the userptr is not yet bound. */
654         if (!vma->userptr.initial_bind) {
655                 up_write(&vm->userptr.notifier_lock);
656                 return true;
657         }
658
659         /*
660          * Tell exec and rebind worker they need to repin and rebind this
661          * userptr.
662          */
663         if (!xe_vm_in_fault_mode(vm) &&
664             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
665                 spin_lock(&vm->userptr.invalidated_lock);
666                 list_move_tail(&vma->userptr.invalidate_link,
667                                &vm->userptr.invalidated);
668                 spin_unlock(&vm->userptr.invalidated_lock);
669         }
670
671         up_write(&vm->userptr.notifier_lock);
672
673         /*
674          * Preempt fences turn into schedule disables, pipeline these.
675          * Note that even in fault mode, we need to wait for binds and
676          * unbinds to complete, and those are attached as BOOKMARK fences
677          * to the vm.
678          */
679         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
680                             DMA_RESV_USAGE_BOOKKEEP);
681         dma_resv_for_each_fence_unlocked(&cursor, fence)
682                 dma_fence_enable_sw_signaling(fence);
683         dma_resv_iter_end(&cursor);
684
685         err = dma_resv_wait_timeout(xe_vm_resv(vm),
686                                     DMA_RESV_USAGE_BOOKKEEP,
687                                     false, MAX_SCHEDULE_TIMEOUT);
688         XE_WARN_ON(err <= 0);
689
690         if (xe_vm_in_fault_mode(vm)) {
691                 err = xe_vm_invalidate_vma(vma);
692                 XE_WARN_ON(err);
693         }
694
695         trace_xe_vma_userptr_invalidate_complete(vma);
696
697         return true;
698 }
699
700 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
701         .invalidate = vma_userptr_invalidate,
702 };
703
704 int xe_vm_userptr_pin(struct xe_vm *vm)
705 {
706         struct xe_vma *vma, *next;
707         int err = 0;
708         LIST_HEAD(tmp_evict);
709
710         lockdep_assert_held_write(&vm->lock);
711
712         /* Collect invalidated userptrs */
713         spin_lock(&vm->userptr.invalidated_lock);
714         list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
715                                  userptr.invalidate_link) {
716                 list_del_init(&vma->userptr.invalidate_link);
717                 list_move_tail(&vma->combined_links.userptr,
718                                &vm->userptr.repin_list);
719         }
720         spin_unlock(&vm->userptr.invalidated_lock);
721
722         /* Pin and move to temporary list */
723         list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
724                                  combined_links.userptr) {
725                 err = xe_vma_userptr_pin_pages(vma);
726                 if (err < 0)
727                         return err;
728
729                 list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
730         }
731
732         return 0;
733 }
734
735 /**
736  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
737  * that need repinning.
738  * @vm: The VM.
739  *
740  * This function does an advisory check for whether the VM has userptrs that
741  * need repinning.
742  *
743  * Return: 0 if there are no indications of userptrs needing repinning,
744  * -EAGAIN if there are.
745  */
746 int xe_vm_userptr_check_repin(struct xe_vm *vm)
747 {
748         return (list_empty_careful(&vm->userptr.repin_list) &&
749                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
750 }
751
752 static struct dma_fence *
753 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
754                struct xe_sync_entry *syncs, u32 num_syncs,
755                bool first_op, bool last_op);
756
757 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
758 {
759         struct dma_fence *fence = NULL;
760         struct xe_vma *vma, *next;
761
762         lockdep_assert_held(&vm->lock);
763         if (xe_vm_in_lr_mode(vm) && !rebind_worker)
764                 return NULL;
765
766         xe_vm_assert_held(vm);
767         list_for_each_entry_safe(vma, next, &vm->rebind_list,
768                                  combined_links.rebind) {
769                 xe_assert(vm->xe, vma->tile_present);
770
771                 list_del_init(&vma->combined_links.rebind);
772                 dma_fence_put(fence);
773                 if (rebind_worker)
774                         trace_xe_vma_rebind_worker(vma);
775                 else
776                         trace_xe_vma_rebind_exec(vma);
777                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
778                 if (IS_ERR(fence))
779                         return fence;
780         }
781
782         return fence;
783 }
784
785 #define VMA_CREATE_FLAG_READ_ONLY       BIT(0)
786 #define VMA_CREATE_FLAG_IS_NULL         BIT(1)
787
788 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
789                                     struct xe_bo *bo,
790                                     u64 bo_offset_or_userptr,
791                                     u64 start, u64 end,
792                                     u16 pat_index, unsigned int flags)
793 {
794         struct xe_vma *vma;
795         struct xe_tile *tile;
796         u8 id;
797         bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
798         bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
799
800         xe_assert(vm->xe, start < end);
801         xe_assert(vm->xe, end < vm->size);
802
803         if (!bo && !is_null)    /* userptr */
804                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
805         else
806                 vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
807                               GFP_KERNEL);
808         if (!vma) {
809                 vma = ERR_PTR(-ENOMEM);
810                 return vma;
811         }
812
813         INIT_LIST_HEAD(&vma->combined_links.rebind);
814
815         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
816         vma->gpuva.vm = &vm->gpuvm;
817         vma->gpuva.va.addr = start;
818         vma->gpuva.va.range = end - start + 1;
819         if (read_only)
820                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
821         if (is_null)
822                 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
823
824         for_each_tile(tile, vm->xe, id)
825                 vma->tile_mask |= 0x1 << id;
826
827         if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
828                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
829
830         vma->pat_index = pat_index;
831
832         if (bo) {
833                 struct drm_gpuvm_bo *vm_bo;
834
835                 xe_bo_assert_held(bo);
836
837                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
838                 if (IS_ERR(vm_bo)) {
839                         kfree(vma);
840                         return ERR_CAST(vm_bo);
841                 }
842
843                 drm_gpuvm_bo_extobj_add(vm_bo);
844                 drm_gem_object_get(&bo->ttm.base);
845                 vma->gpuva.gem.obj = &bo->ttm.base;
846                 vma->gpuva.gem.offset = bo_offset_or_userptr;
847                 drm_gpuva_link(&vma->gpuva, vm_bo);
848                 drm_gpuvm_bo_put(vm_bo);
849         } else /* userptr or null */ {
850                 if (!is_null) {
851                         u64 size = end - start + 1;
852                         int err;
853
854                         INIT_LIST_HEAD(&vma->userptr.invalidate_link);
855                         vma->gpuva.gem.offset = bo_offset_or_userptr;
856
857                         err = mmu_interval_notifier_insert(&vma->userptr.notifier,
858                                                            current->mm,
859                                                            xe_vma_userptr(vma), size,
860                                                            &vma_userptr_notifier_ops);
861                         if (err) {
862                                 kfree(vma);
863                                 vma = ERR_PTR(err);
864                                 return vma;
865                         }
866
867                         vma->userptr.notifier_seq = LONG_MAX;
868                 }
869
870                 xe_vm_get(vm);
871         }
872
873         return vma;
874 }
875
876 static void xe_vma_destroy_late(struct xe_vma *vma)
877 {
878         struct xe_vm *vm = xe_vma_vm(vma);
879         struct xe_device *xe = vm->xe;
880         bool read_only = xe_vma_read_only(vma);
881
882         if (xe_vma_is_userptr(vma)) {
883                 if (vma->userptr.sg) {
884                         dma_unmap_sgtable(xe->drm.dev,
885                                           vma->userptr.sg,
886                                           read_only ? DMA_TO_DEVICE :
887                                           DMA_BIDIRECTIONAL, 0);
888                         sg_free_table(vma->userptr.sg);
889                         vma->userptr.sg = NULL;
890                 }
891
892                 /*
893                  * Since userptr pages are not pinned, we can't remove
894                  * the notifer until we're sure the GPU is not accessing
895                  * them anymore
896                  */
897                 mmu_interval_notifier_remove(&vma->userptr.notifier);
898                 xe_vm_put(vm);
899         } else if (xe_vma_is_null(vma)) {
900                 xe_vm_put(vm);
901         } else {
902                 xe_bo_put(xe_vma_bo(vma));
903         }
904
905         kfree(vma);
906 }
907
908 static void vma_destroy_work_func(struct work_struct *w)
909 {
910         struct xe_vma *vma =
911                 container_of(w, struct xe_vma, destroy_work);
912
913         xe_vma_destroy_late(vma);
914 }
915
916 static void vma_destroy_cb(struct dma_fence *fence,
917                            struct dma_fence_cb *cb)
918 {
919         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
920
921         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
922         queue_work(system_unbound_wq, &vma->destroy_work);
923 }
924
925 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
926 {
927         struct xe_vm *vm = xe_vma_vm(vma);
928
929         lockdep_assert_held_write(&vm->lock);
930         xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
931
932         if (xe_vma_is_userptr(vma)) {
933                 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
934
935                 spin_lock(&vm->userptr.invalidated_lock);
936                 list_del(&vma->userptr.invalidate_link);
937                 spin_unlock(&vm->userptr.invalidated_lock);
938         } else if (!xe_vma_is_null(vma)) {
939                 xe_bo_assert_held(xe_vma_bo(vma));
940
941                 drm_gpuva_unlink(&vma->gpuva);
942         }
943
944         xe_vm_assert_held(vm);
945         if (fence) {
946                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
947                                                  vma_destroy_cb);
948
949                 if (ret) {
950                         XE_WARN_ON(ret != -ENOENT);
951                         xe_vma_destroy_late(vma);
952                 }
953         } else {
954                 xe_vma_destroy_late(vma);
955         }
956 }
957
958 /**
959  * xe_vm_prepare_vma() - drm_exec utility to lock a vma
960  * @exec: The drm_exec object we're currently locking for.
961  * @vma: The vma for witch we want to lock the vm resv and any attached
962  * object's resv.
963  * @num_shared: The number of dma-fence slots to pre-allocate in the
964  * objects' reservation objects.
965  *
966  * Return: 0 on success, negative error code on error. In particular
967  * may return -EDEADLK on WW transaction contention and -EINTR if
968  * an interruptible wait is terminated by a signal.
969  */
970 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
971                       unsigned int num_shared)
972 {
973         struct xe_vm *vm = xe_vma_vm(vma);
974         struct xe_bo *bo = xe_vma_bo(vma);
975         int err;
976
977         XE_WARN_ON(!vm);
978         err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
979         if (!err && bo && !bo->vm)
980                 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
981
982         return err;
983 }
984
985 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
986 {
987         struct drm_exec exec;
988         int err;
989
990         drm_exec_init(&exec, 0, 0);
991         drm_exec_until_all_locked(&exec) {
992                 err = xe_vm_prepare_vma(&exec, vma, 0);
993                 drm_exec_retry_on_contention(&exec);
994                 if (XE_WARN_ON(err))
995                         break;
996         }
997
998         xe_vma_destroy(vma, NULL);
999
1000         drm_exec_fini(&exec);
1001 }
1002
1003 struct xe_vma *
1004 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1005 {
1006         struct drm_gpuva *gpuva;
1007
1008         lockdep_assert_held(&vm->lock);
1009
1010         if (xe_vm_is_closed_or_banned(vm))
1011                 return NULL;
1012
1013         xe_assert(vm->xe, start + range <= vm->size);
1014
1015         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1016
1017         return gpuva ? gpuva_to_vma(gpuva) : NULL;
1018 }
1019
1020 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1021 {
1022         int err;
1023
1024         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1025         lockdep_assert_held(&vm->lock);
1026
1027         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1028         XE_WARN_ON(err);        /* Shouldn't be possible */
1029
1030         return err;
1031 }
1032
1033 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1034 {
1035         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1036         lockdep_assert_held(&vm->lock);
1037
1038         drm_gpuva_remove(&vma->gpuva);
1039         if (vm->usm.last_fault_vma == vma)
1040                 vm->usm.last_fault_vma = NULL;
1041 }
1042
1043 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1044 {
1045         struct xe_vma_op *op;
1046
1047         op = kzalloc(sizeof(*op), GFP_KERNEL);
1048
1049         if (unlikely(!op))
1050                 return NULL;
1051
1052         return &op->base;
1053 }
1054
1055 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1056
1057 static const struct drm_gpuvm_ops gpuvm_ops = {
1058         .op_alloc = xe_vm_op_alloc,
1059         .vm_bo_validate = xe_gpuvm_validate,
1060         .vm_free = xe_vm_free,
1061 };
1062
1063 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1064 {
1065         u64 pte = 0;
1066
1067         if (pat_index & BIT(0))
1068                 pte |= XE_PPGTT_PTE_PAT0;
1069
1070         if (pat_index & BIT(1))
1071                 pte |= XE_PPGTT_PTE_PAT1;
1072
1073         return pte;
1074 }
1075
1076 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1077                                 u32 pt_level)
1078 {
1079         u64 pte = 0;
1080
1081         if (pat_index & BIT(0))
1082                 pte |= XE_PPGTT_PTE_PAT0;
1083
1084         if (pat_index & BIT(1))
1085                 pte |= XE_PPGTT_PTE_PAT1;
1086
1087         if (pat_index & BIT(2)) {
1088                 if (pt_level)
1089                         pte |= XE_PPGTT_PDE_PDPE_PAT2;
1090                 else
1091                         pte |= XE_PPGTT_PTE_PAT2;
1092         }
1093
1094         if (pat_index & BIT(3))
1095                 pte |= XELPG_PPGTT_PTE_PAT3;
1096
1097         if (pat_index & (BIT(4)))
1098                 pte |= XE2_PPGTT_PTE_PAT4;
1099
1100         return pte;
1101 }
1102
1103 static u64 pte_encode_ps(u32 pt_level)
1104 {
1105         XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1106
1107         if (pt_level == 1)
1108                 return XE_PDE_PS_2M;
1109         else if (pt_level == 2)
1110                 return XE_PDPE_PS_1G;
1111
1112         return 0;
1113 }
1114
1115 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1116                               const u16 pat_index)
1117 {
1118         struct xe_device *xe = xe_bo_device(bo);
1119         u64 pde;
1120
1121         pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1122         pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1123         pde |= pde_encode_pat_index(xe, pat_index);
1124
1125         return pde;
1126 }
1127
1128 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1129                               u16 pat_index, u32 pt_level)
1130 {
1131         struct xe_device *xe = xe_bo_device(bo);
1132         u64 pte;
1133
1134         pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1135         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1136         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1137         pte |= pte_encode_ps(pt_level);
1138
1139         if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1140                 pte |= XE_PPGTT_PTE_DM;
1141
1142         return pte;
1143 }
1144
1145 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1146                                u16 pat_index, u32 pt_level)
1147 {
1148         struct xe_device *xe = xe_vma_vm(vma)->xe;
1149
1150         pte |= XE_PAGE_PRESENT;
1151
1152         if (likely(!xe_vma_read_only(vma)))
1153                 pte |= XE_PAGE_RW;
1154
1155         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1156         pte |= pte_encode_ps(pt_level);
1157
1158         if (unlikely(xe_vma_is_null(vma)))
1159                 pte |= XE_PTE_NULL;
1160
1161         return pte;
1162 }
1163
1164 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1165                                 u16 pat_index,
1166                                 u32 pt_level, bool devmem, u64 flags)
1167 {
1168         u64 pte;
1169
1170         /* Avoid passing random bits directly as flags */
1171         xe_assert(xe, !(flags & ~XE_PTE_PS64));
1172
1173         pte = addr;
1174         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1175         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1176         pte |= pte_encode_ps(pt_level);
1177
1178         if (devmem)
1179                 pte |= XE_PPGTT_PTE_DM;
1180
1181         pte |= flags;
1182
1183         return pte;
1184 }
1185
1186 static const struct xe_pt_ops xelp_pt_ops = {
1187         .pte_encode_bo = xelp_pte_encode_bo,
1188         .pte_encode_vma = xelp_pte_encode_vma,
1189         .pte_encode_addr = xelp_pte_encode_addr,
1190         .pde_encode_bo = xelp_pde_encode_bo,
1191 };
1192
1193 static void vm_destroy_work_func(struct work_struct *w);
1194
1195 /**
1196  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1197  * given tile and vm.
1198  * @xe: xe device.
1199  * @tile: tile to set up for.
1200  * @vm: vm to set up for.
1201  *
1202  * Sets up a pagetable tree with one page-table per level and a single
1203  * leaf PTE. All pagetable entries point to the single page-table or,
1204  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1205  * writes become NOPs.
1206  *
1207  * Return: 0 on success, negative error code on error.
1208  */
1209 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1210                                 struct xe_vm *vm)
1211 {
1212         u8 id = tile->id;
1213         int i;
1214
1215         for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1216                 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1217                 if (IS_ERR(vm->scratch_pt[id][i]))
1218                         return PTR_ERR(vm->scratch_pt[id][i]);
1219
1220                 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void xe_vm_free_scratch(struct xe_vm *vm)
1227 {
1228         struct xe_tile *tile;
1229         u8 id;
1230
1231         if (!xe_vm_has_scratch(vm))
1232                 return;
1233
1234         for_each_tile(tile, vm->xe, id) {
1235                 u32 i;
1236
1237                 if (!vm->pt_root[id])
1238                         continue;
1239
1240                 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1241                         if (vm->scratch_pt[id][i])
1242                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1243         }
1244 }
1245
1246 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1247 {
1248         struct drm_gem_object *vm_resv_obj;
1249         struct xe_vm *vm;
1250         int err, number_tiles = 0;
1251         struct xe_tile *tile;
1252         u8 id;
1253
1254         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1255         if (!vm)
1256                 return ERR_PTR(-ENOMEM);
1257
1258         vm->xe = xe;
1259
1260         vm->size = 1ull << xe->info.va_bits;
1261
1262         vm->flags = flags;
1263
1264         init_rwsem(&vm->lock);
1265
1266         INIT_LIST_HEAD(&vm->rebind_list);
1267
1268         INIT_LIST_HEAD(&vm->userptr.repin_list);
1269         INIT_LIST_HEAD(&vm->userptr.invalidated);
1270         init_rwsem(&vm->userptr.notifier_lock);
1271         spin_lock_init(&vm->userptr.invalidated_lock);
1272
1273         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1274
1275         INIT_LIST_HEAD(&vm->preempt.exec_queues);
1276         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1277
1278         for_each_tile(tile, xe, id)
1279                 xe_range_fence_tree_init(&vm->rftree[id]);
1280
1281         vm->pt_ops = &xelp_pt_ops;
1282
1283         if (!(flags & XE_VM_FLAG_MIGRATION))
1284                 xe_device_mem_access_get(xe);
1285
1286         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1287         if (!vm_resv_obj) {
1288                 err = -ENOMEM;
1289                 goto err_no_resv;
1290         }
1291
1292         drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1293                        vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1294
1295         drm_gem_object_put(vm_resv_obj);
1296
1297         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1298         if (err)
1299                 goto err_close;
1300
1301         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1302                 vm->flags |= XE_VM_FLAG_64K;
1303
1304         for_each_tile(tile, xe, id) {
1305                 if (flags & XE_VM_FLAG_MIGRATION &&
1306                     tile->id != XE_VM_FLAG_TILE_ID(flags))
1307                         continue;
1308
1309                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1310                 if (IS_ERR(vm->pt_root[id])) {
1311                         err = PTR_ERR(vm->pt_root[id]);
1312                         vm->pt_root[id] = NULL;
1313                         goto err_unlock_close;
1314                 }
1315         }
1316
1317         if (xe_vm_has_scratch(vm)) {
1318                 for_each_tile(tile, xe, id) {
1319                         if (!vm->pt_root[id])
1320                                 continue;
1321
1322                         err = xe_vm_create_scratch(xe, tile, vm);
1323                         if (err)
1324                                 goto err_unlock_close;
1325                 }
1326                 vm->batch_invalidate_tlb = true;
1327         }
1328
1329         if (flags & XE_VM_FLAG_LR_MODE) {
1330                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1331                 vm->flags |= XE_VM_FLAG_LR_MODE;
1332                 vm->batch_invalidate_tlb = false;
1333         }
1334
1335         /* Fill pt_root after allocating scratch tables */
1336         for_each_tile(tile, xe, id) {
1337                 if (!vm->pt_root[id])
1338                         continue;
1339
1340                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1341         }
1342         dma_resv_unlock(xe_vm_resv(vm));
1343
1344         /* Kernel migration VM shouldn't have a circular loop.. */
1345         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1346                 for_each_tile(tile, xe, id) {
1347                         struct xe_gt *gt = tile->primary_gt;
1348                         struct xe_vm *migrate_vm;
1349                         struct xe_exec_queue *q;
1350                         u32 create_flags = EXEC_QUEUE_FLAG_VM;
1351
1352                         if (!vm->pt_root[id])
1353                                 continue;
1354
1355                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1356                         q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1357                                                        XE_ENGINE_CLASS_COPY,
1358                                                        create_flags);
1359                         xe_vm_put(migrate_vm);
1360                         if (IS_ERR(q)) {
1361                                 err = PTR_ERR(q);
1362                                 goto err_close;
1363                         }
1364                         vm->q[id] = q;
1365                         number_tiles++;
1366                 }
1367         }
1368
1369         if (number_tiles > 1)
1370                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1371
1372         mutex_lock(&xe->usm.lock);
1373         if (flags & XE_VM_FLAG_FAULT_MODE)
1374                 xe->usm.num_vm_in_fault_mode++;
1375         else if (!(flags & XE_VM_FLAG_MIGRATION))
1376                 xe->usm.num_vm_in_non_fault_mode++;
1377         mutex_unlock(&xe->usm.lock);
1378
1379         trace_xe_vm_create(vm);
1380
1381         return vm;
1382
1383 err_unlock_close:
1384         dma_resv_unlock(xe_vm_resv(vm));
1385 err_close:
1386         xe_vm_close_and_put(vm);
1387         return ERR_PTR(err);
1388
1389 err_no_resv:
1390         for_each_tile(tile, xe, id)
1391                 xe_range_fence_tree_fini(&vm->rftree[id]);
1392         kfree(vm);
1393         if (!(flags & XE_VM_FLAG_MIGRATION))
1394                 xe_device_mem_access_put(xe);
1395         return ERR_PTR(err);
1396 }
1397
1398 static void xe_vm_close(struct xe_vm *vm)
1399 {
1400         down_write(&vm->lock);
1401         vm->size = 0;
1402         up_write(&vm->lock);
1403 }
1404
1405 void xe_vm_close_and_put(struct xe_vm *vm)
1406 {
1407         LIST_HEAD(contested);
1408         struct xe_device *xe = vm->xe;
1409         struct xe_tile *tile;
1410         struct xe_vma *vma, *next_vma;
1411         struct drm_gpuva *gpuva, *next;
1412         u8 id;
1413
1414         xe_assert(xe, !vm->preempt.num_exec_queues);
1415
1416         xe_vm_close(vm);
1417         if (xe_vm_in_preempt_fence_mode(vm))
1418                 flush_work(&vm->preempt.rebind_work);
1419
1420         down_write(&vm->lock);
1421         for_each_tile(tile, xe, id) {
1422                 if (vm->q[id])
1423                         xe_exec_queue_last_fence_put(vm->q[id], vm);
1424         }
1425         up_write(&vm->lock);
1426
1427         for_each_tile(tile, xe, id) {
1428                 if (vm->q[id]) {
1429                         xe_exec_queue_kill(vm->q[id]);
1430                         xe_exec_queue_put(vm->q[id]);
1431                         vm->q[id] = NULL;
1432                 }
1433         }
1434
1435         down_write(&vm->lock);
1436         xe_vm_lock(vm, false);
1437         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1438                 vma = gpuva_to_vma(gpuva);
1439
1440                 if (xe_vma_has_no_bo(vma)) {
1441                         down_read(&vm->userptr.notifier_lock);
1442                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1443                         up_read(&vm->userptr.notifier_lock);
1444                 }
1445
1446                 xe_vm_remove_vma(vm, vma);
1447
1448                 /* easy case, remove from VMA? */
1449                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1450                         list_del_init(&vma->combined_links.rebind);
1451                         xe_vma_destroy(vma, NULL);
1452                         continue;
1453                 }
1454
1455                 list_move_tail(&vma->combined_links.destroy, &contested);
1456                 vma->gpuva.flags |= XE_VMA_DESTROYED;
1457         }
1458
1459         /*
1460          * All vm operations will add shared fences to resv.
1461          * The only exception is eviction for a shared object,
1462          * but even so, the unbind when evicted would still
1463          * install a fence to resv. Hence it's safe to
1464          * destroy the pagetables immediately.
1465          */
1466         xe_vm_free_scratch(vm);
1467
1468         for_each_tile(tile, xe, id) {
1469                 if (vm->pt_root[id]) {
1470                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1471                         vm->pt_root[id] = NULL;
1472                 }
1473         }
1474         xe_vm_unlock(vm);
1475
1476         /*
1477          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1478          * Since we hold a refcount to the bo, we can remove and free
1479          * the members safely without locking.
1480          */
1481         list_for_each_entry_safe(vma, next_vma, &contested,
1482                                  combined_links.destroy) {
1483                 list_del_init(&vma->combined_links.destroy);
1484                 xe_vma_destroy_unlocked(vma);
1485         }
1486
1487         up_write(&vm->lock);
1488
1489         mutex_lock(&xe->usm.lock);
1490         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1491                 xe->usm.num_vm_in_fault_mode--;
1492         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1493                 xe->usm.num_vm_in_non_fault_mode--;
1494         mutex_unlock(&xe->usm.lock);
1495
1496         for_each_tile(tile, xe, id)
1497                 xe_range_fence_tree_fini(&vm->rftree[id]);
1498
1499         xe_vm_put(vm);
1500 }
1501
1502 static void vm_destroy_work_func(struct work_struct *w)
1503 {
1504         struct xe_vm *vm =
1505                 container_of(w, struct xe_vm, destroy_work);
1506         struct xe_device *xe = vm->xe;
1507         struct xe_tile *tile;
1508         u8 id;
1509         void *lookup;
1510
1511         /* xe_vm_close_and_put was not called? */
1512         xe_assert(xe, !vm->size);
1513
1514         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1515                 xe_device_mem_access_put(xe);
1516
1517                 if (xe->info.has_asid && vm->usm.asid) {
1518                         mutex_lock(&xe->usm.lock);
1519                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1520                         xe_assert(xe, lookup == vm);
1521                         mutex_unlock(&xe->usm.lock);
1522                 }
1523         }
1524
1525         for_each_tile(tile, xe, id)
1526                 XE_WARN_ON(vm->pt_root[id]);
1527
1528         trace_xe_vm_free(vm);
1529         dma_fence_put(vm->rebind_fence);
1530         kfree(vm);
1531 }
1532
1533 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1534 {
1535         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1536
1537         /* To destroy the VM we need to be able to sleep */
1538         queue_work(system_unbound_wq, &vm->destroy_work);
1539 }
1540
1541 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1542 {
1543         struct xe_vm *vm;
1544
1545         mutex_lock(&xef->vm.lock);
1546         vm = xa_load(&xef->vm.xa, id);
1547         if (vm)
1548                 xe_vm_get(vm);
1549         mutex_unlock(&xef->vm.lock);
1550
1551         return vm;
1552 }
1553
1554 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1555 {
1556         return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1557                                          tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1558 }
1559
1560 static struct xe_exec_queue *
1561 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1562 {
1563         return q ? q : vm->q[0];
1564 }
1565
1566 static struct dma_fence *
1567 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1568                  struct xe_sync_entry *syncs, u32 num_syncs,
1569                  bool first_op, bool last_op)
1570 {
1571         struct xe_vm *vm = xe_vma_vm(vma);
1572         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1573         struct xe_tile *tile;
1574         struct dma_fence *fence = NULL;
1575         struct dma_fence **fences = NULL;
1576         struct dma_fence_array *cf = NULL;
1577         int cur_fence = 0, i;
1578         int number_tiles = hweight8(vma->tile_present);
1579         int err;
1580         u8 id;
1581
1582         trace_xe_vma_unbind(vma);
1583
1584         if (number_tiles > 1) {
1585                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1586                                        GFP_KERNEL);
1587                 if (!fences)
1588                         return ERR_PTR(-ENOMEM);
1589         }
1590
1591         for_each_tile(tile, vm->xe, id) {
1592                 if (!(vma->tile_present & BIT(id)))
1593                         goto next;
1594
1595                 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1596                                            first_op ? syncs : NULL,
1597                                            first_op ? num_syncs : 0);
1598                 if (IS_ERR(fence)) {
1599                         err = PTR_ERR(fence);
1600                         goto err_fences;
1601                 }
1602
1603                 if (fences)
1604                         fences[cur_fence++] = fence;
1605
1606 next:
1607                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1608                         q = list_next_entry(q, multi_gt_list);
1609         }
1610
1611         if (fences) {
1612                 cf = dma_fence_array_create(number_tiles, fences,
1613                                             vm->composite_fence_ctx,
1614                                             vm->composite_fence_seqno++,
1615                                             false);
1616                 if (!cf) {
1617                         --vm->composite_fence_seqno;
1618                         err = -ENOMEM;
1619                         goto err_fences;
1620                 }
1621         }
1622
1623         fence = cf ? &cf->base : !fence ?
1624                 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
1625         if (last_op) {
1626                 for (i = 0; i < num_syncs; i++)
1627                         xe_sync_entry_signal(&syncs[i], NULL, fence);
1628         }
1629
1630         return fence;
1631
1632 err_fences:
1633         if (fences) {
1634                 while (cur_fence)
1635                         dma_fence_put(fences[--cur_fence]);
1636                 kfree(fences);
1637         }
1638
1639         return ERR_PTR(err);
1640 }
1641
1642 static struct dma_fence *
1643 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1644                struct xe_sync_entry *syncs, u32 num_syncs,
1645                bool first_op, bool last_op)
1646 {
1647         struct xe_tile *tile;
1648         struct dma_fence *fence;
1649         struct dma_fence **fences = NULL;
1650         struct dma_fence_array *cf = NULL;
1651         struct xe_vm *vm = xe_vma_vm(vma);
1652         int cur_fence = 0, i;
1653         int number_tiles = hweight8(vma->tile_mask);
1654         int err;
1655         u8 id;
1656
1657         trace_xe_vma_bind(vma);
1658
1659         if (number_tiles > 1) {
1660                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1661                                        GFP_KERNEL);
1662                 if (!fences)
1663                         return ERR_PTR(-ENOMEM);
1664         }
1665
1666         for_each_tile(tile, vm->xe, id) {
1667                 if (!(vma->tile_mask & BIT(id)))
1668                         goto next;
1669
1670                 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1671                                          first_op ? syncs : NULL,
1672                                          first_op ? num_syncs : 0,
1673                                          vma->tile_present & BIT(id));
1674                 if (IS_ERR(fence)) {
1675                         err = PTR_ERR(fence);
1676                         goto err_fences;
1677                 }
1678
1679                 if (fences)
1680                         fences[cur_fence++] = fence;
1681
1682 next:
1683                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1684                         q = list_next_entry(q, multi_gt_list);
1685         }
1686
1687         if (fences) {
1688                 cf = dma_fence_array_create(number_tiles, fences,
1689                                             vm->composite_fence_ctx,
1690                                             vm->composite_fence_seqno++,
1691                                             false);
1692                 if (!cf) {
1693                         --vm->composite_fence_seqno;
1694                         err = -ENOMEM;
1695                         goto err_fences;
1696                 }
1697         }
1698
1699         if (last_op) {
1700                 for (i = 0; i < num_syncs; i++)
1701                         xe_sync_entry_signal(&syncs[i], NULL,
1702                                              cf ? &cf->base : fence);
1703         }
1704
1705         return cf ? &cf->base : fence;
1706
1707 err_fences:
1708         if (fences) {
1709                 while (cur_fence)
1710                         dma_fence_put(fences[--cur_fence]);
1711                 kfree(fences);
1712         }
1713
1714         return ERR_PTR(err);
1715 }
1716
1717 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1718                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1719                         u32 num_syncs, bool immediate, bool first_op,
1720                         bool last_op)
1721 {
1722         struct dma_fence *fence;
1723         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1724
1725         xe_vm_assert_held(vm);
1726
1727         if (immediate) {
1728                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1729                                        last_op);
1730                 if (IS_ERR(fence))
1731                         return PTR_ERR(fence);
1732         } else {
1733                 int i;
1734
1735                 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1736
1737                 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1738                 if (last_op) {
1739                         for (i = 0; i < num_syncs; i++)
1740                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1741                 }
1742         }
1743
1744         if (last_op)
1745                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1746         dma_fence_put(fence);
1747
1748         return 0;
1749 }
1750
1751 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1752                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1753                       u32 num_syncs, bool immediate, bool first_op,
1754                       bool last_op)
1755 {
1756         int err;
1757
1758         xe_vm_assert_held(vm);
1759         xe_bo_assert_held(bo);
1760
1761         if (bo && immediate) {
1762                 err = xe_bo_validate(bo, vm, true);
1763                 if (err)
1764                         return err;
1765         }
1766
1767         return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1768                             last_op);
1769 }
1770
1771 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1772                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1773                         u32 num_syncs, bool first_op, bool last_op)
1774 {
1775         struct dma_fence *fence;
1776         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1777
1778         xe_vm_assert_held(vm);
1779         xe_bo_assert_held(xe_vma_bo(vma));
1780
1781         fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1782         if (IS_ERR(fence))
1783                 return PTR_ERR(fence);
1784
1785         xe_vma_destroy(vma, fence);
1786         if (last_op)
1787                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1788         dma_fence_put(fence);
1789
1790         return 0;
1791 }
1792
1793 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1794                                     DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1795                                     DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1796
1797 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1798                        struct drm_file *file)
1799 {
1800         struct xe_device *xe = to_xe_device(dev);
1801         struct xe_file *xef = to_xe_file(file);
1802         struct drm_xe_vm_create *args = data;
1803         struct xe_tile *tile;
1804         struct xe_vm *vm;
1805         u32 id, asid;
1806         int err;
1807         u32 flags = 0;
1808
1809         if (XE_IOCTL_DBG(xe, args->extensions))
1810                 return -EINVAL;
1811
1812         if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1813                 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1814
1815         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1816                          !xe->info.has_usm))
1817                 return -EINVAL;
1818
1819         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1820                 return -EINVAL;
1821
1822         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1823                 return -EINVAL;
1824
1825         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1826                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1827                 return -EINVAL;
1828
1829         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1830                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1831                 return -EINVAL;
1832
1833         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1834                          xe_device_in_non_fault_mode(xe)))
1835                 return -EINVAL;
1836
1837         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
1838                          xe_device_in_fault_mode(xe)))
1839                 return -EINVAL;
1840
1841         if (XE_IOCTL_DBG(xe, args->extensions))
1842                 return -EINVAL;
1843
1844         if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1845                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1846         if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1847                 flags |= XE_VM_FLAG_LR_MODE;
1848         if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1849                 flags |= XE_VM_FLAG_FAULT_MODE;
1850
1851         vm = xe_vm_create(xe, flags);
1852         if (IS_ERR(vm))
1853                 return PTR_ERR(vm);
1854
1855         mutex_lock(&xef->vm.lock);
1856         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1857         mutex_unlock(&xef->vm.lock);
1858         if (err)
1859                 goto err_close_and_put;
1860
1861         if (xe->info.has_asid) {
1862                 mutex_lock(&xe->usm.lock);
1863                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1864                                       XA_LIMIT(1, XE_MAX_ASID - 1),
1865                                       &xe->usm.next_asid, GFP_KERNEL);
1866                 mutex_unlock(&xe->usm.lock);
1867                 if (err < 0)
1868                         goto err_free_id;
1869
1870                 vm->usm.asid = asid;
1871         }
1872
1873         args->vm_id = id;
1874         vm->xef = xef;
1875
1876         /* Record BO memory for VM pagetable created against client */
1877         for_each_tile(tile, xe, id)
1878                 if (vm->pt_root[id])
1879                         xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1880
1881 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1882         /* Warning: Security issue - never enable by default */
1883         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1884 #endif
1885
1886         return 0;
1887
1888 err_free_id:
1889         mutex_lock(&xef->vm.lock);
1890         xa_erase(&xef->vm.xa, id);
1891         mutex_unlock(&xef->vm.lock);
1892 err_close_and_put:
1893         xe_vm_close_and_put(vm);
1894
1895         return err;
1896 }
1897
1898 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1899                         struct drm_file *file)
1900 {
1901         struct xe_device *xe = to_xe_device(dev);
1902         struct xe_file *xef = to_xe_file(file);
1903         struct drm_xe_vm_destroy *args = data;
1904         struct xe_vm *vm;
1905         int err = 0;
1906
1907         if (XE_IOCTL_DBG(xe, args->pad) ||
1908             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1909                 return -EINVAL;
1910
1911         mutex_lock(&xef->vm.lock);
1912         vm = xa_load(&xef->vm.xa, args->vm_id);
1913         if (XE_IOCTL_DBG(xe, !vm))
1914                 err = -ENOENT;
1915         else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1916                 err = -EBUSY;
1917         else
1918                 xa_erase(&xef->vm.xa, args->vm_id);
1919         mutex_unlock(&xef->vm.lock);
1920
1921         if (!err)
1922                 xe_vm_close_and_put(vm);
1923
1924         return err;
1925 }
1926
1927 static const u32 region_to_mem_type[] = {
1928         XE_PL_TT,
1929         XE_PL_VRAM0,
1930         XE_PL_VRAM1,
1931 };
1932
1933 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
1934                           struct xe_exec_queue *q, u32 region,
1935                           struct xe_sync_entry *syncs, u32 num_syncs,
1936                           bool first_op, bool last_op)
1937 {
1938         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1939         int err;
1940
1941         xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
1942
1943         if (!xe_vma_has_no_bo(vma)) {
1944                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
1945                 if (err)
1946                         return err;
1947         }
1948
1949         if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
1950                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
1951                                   true, first_op, last_op);
1952         } else {
1953                 int i;
1954
1955                 /* Nothing to do, signal fences now */
1956                 if (last_op) {
1957                         for (i = 0; i < num_syncs; i++) {
1958                                 struct dma_fence *fence =
1959                                         xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1960
1961                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1962                         }
1963                 }
1964
1965                 return 0;
1966         }
1967 }
1968
1969 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1970                              bool post_commit)
1971 {
1972         down_read(&vm->userptr.notifier_lock);
1973         vma->gpuva.flags |= XE_VMA_DESTROYED;
1974         up_read(&vm->userptr.notifier_lock);
1975         if (post_commit)
1976                 xe_vm_remove_vma(vm, vma);
1977 }
1978
1979 #undef ULL
1980 #define ULL     unsigned long long
1981
1982 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
1983 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1984 {
1985         struct xe_vma *vma;
1986
1987         switch (op->op) {
1988         case DRM_GPUVA_OP_MAP:
1989                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1990                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
1991                 break;
1992         case DRM_GPUVA_OP_REMAP:
1993                 vma = gpuva_to_vma(op->remap.unmap->va);
1994                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1995                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1996                        op->remap.unmap->keep ? 1 : 0);
1997                 if (op->remap.prev)
1998                         vm_dbg(&xe->drm,
1999                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2000                                (ULL)op->remap.prev->va.addr,
2001                                (ULL)op->remap.prev->va.range);
2002                 if (op->remap.next)
2003                         vm_dbg(&xe->drm,
2004                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2005                                (ULL)op->remap.next->va.addr,
2006                                (ULL)op->remap.next->va.range);
2007                 break;
2008         case DRM_GPUVA_OP_UNMAP:
2009                 vma = gpuva_to_vma(op->unmap.va);
2010                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2011                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2012                        op->unmap.keep ? 1 : 0);
2013                 break;
2014         case DRM_GPUVA_OP_PREFETCH:
2015                 vma = gpuva_to_vma(op->prefetch.va);
2016                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2017                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2018                 break;
2019         default:
2020                 drm_warn(&xe->drm, "NOT POSSIBLE");
2021         }
2022 }
2023 #else
2024 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2025 {
2026 }
2027 #endif
2028
2029 /*
2030  * Create operations list from IOCTL arguments, setup operations fields so parse
2031  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2032  */
2033 static struct drm_gpuva_ops *
2034 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2035                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2036                          u32 operation, u32 flags,
2037                          u32 prefetch_region, u16 pat_index)
2038 {
2039         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2040         struct drm_gpuva_ops *ops;
2041         struct drm_gpuva_op *__op;
2042         struct xe_vma_op *op;
2043         struct drm_gpuvm_bo *vm_bo;
2044         int err;
2045
2046         lockdep_assert_held_write(&vm->lock);
2047
2048         vm_dbg(&vm->xe->drm,
2049                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2050                operation, (ULL)addr, (ULL)range,
2051                (ULL)bo_offset_or_userptr);
2052
2053         switch (operation) {
2054         case DRM_XE_VM_BIND_OP_MAP:
2055         case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2056                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2057                                                   obj, bo_offset_or_userptr);
2058                 break;
2059         case DRM_XE_VM_BIND_OP_UNMAP:
2060                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2061                 break;
2062         case DRM_XE_VM_BIND_OP_PREFETCH:
2063                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2064                 break;
2065         case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2066                 xe_assert(vm->xe, bo);
2067
2068                 err = xe_bo_lock(bo, true);
2069                 if (err)
2070                         return ERR_PTR(err);
2071
2072                 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2073                 if (IS_ERR(vm_bo)) {
2074                         xe_bo_unlock(bo);
2075                         return ERR_CAST(vm_bo);
2076                 }
2077
2078                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2079                 drm_gpuvm_bo_put(vm_bo);
2080                 xe_bo_unlock(bo);
2081                 break;
2082         default:
2083                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2084                 ops = ERR_PTR(-EINVAL);
2085         }
2086         if (IS_ERR(ops))
2087                 return ops;
2088
2089 #ifdef TEST_VM_ASYNC_OPS_ERROR
2090         if (operation & FORCE_ASYNC_OP_ERROR) {
2091                 op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
2092                                               base.entry);
2093                 if (op)
2094                         op->inject_error = true;
2095         }
2096 #endif
2097
2098         drm_gpuva_for_each_op(__op, ops) {
2099                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2100
2101                 if (__op->op == DRM_GPUVA_OP_MAP) {
2102                         op->map.immediate =
2103                                 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2104                         op->map.read_only =
2105                                 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2106                         op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2107                         op->map.pat_index = pat_index;
2108                 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2109                         op->prefetch.region = prefetch_region;
2110                 }
2111
2112                 print_op(vm->xe, __op);
2113         }
2114
2115         return ops;
2116 }
2117
2118 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2119                               u16 pat_index, unsigned int flags)
2120 {
2121         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2122         struct drm_exec exec;
2123         struct xe_vma *vma;
2124         int err;
2125
2126         lockdep_assert_held_write(&vm->lock);
2127
2128         if (bo) {
2129                 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2130                 drm_exec_until_all_locked(&exec) {
2131                         err = 0;
2132                         if (!bo->vm) {
2133                                 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2134                                 drm_exec_retry_on_contention(&exec);
2135                         }
2136                         if (!err) {
2137                                 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2138                                 drm_exec_retry_on_contention(&exec);
2139                         }
2140                         if (err) {
2141                                 drm_exec_fini(&exec);
2142                                 return ERR_PTR(err);
2143                         }
2144                 }
2145         }
2146         vma = xe_vma_create(vm, bo, op->gem.offset,
2147                             op->va.addr, op->va.addr +
2148                             op->va.range - 1, pat_index, flags);
2149         if (bo)
2150                 drm_exec_fini(&exec);
2151
2152         if (xe_vma_is_userptr(vma)) {
2153                 err = xe_vma_userptr_pin_pages(vma);
2154                 if (err) {
2155                         prep_vma_destroy(vm, vma, false);
2156                         xe_vma_destroy_unlocked(vma);
2157                         return ERR_PTR(err);
2158                 }
2159         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2160                 err = add_preempt_fences(vm, bo);
2161                 if (err) {
2162                         prep_vma_destroy(vm, vma, false);
2163                         xe_vma_destroy_unlocked(vma);
2164                         return ERR_PTR(err);
2165                 }
2166         }
2167
2168         return vma;
2169 }
2170
2171 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2172 {
2173         if (vma->gpuva.flags & XE_VMA_PTE_1G)
2174                 return SZ_1G;
2175         else if (vma->gpuva.flags & XE_VMA_PTE_2M)
2176                 return SZ_2M;
2177
2178         return SZ_4K;
2179 }
2180
2181 static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2182 {
2183         switch (size) {
2184         case SZ_1G:
2185                 vma->gpuva.flags |= XE_VMA_PTE_1G;
2186                 break;
2187         case SZ_2M:
2188                 vma->gpuva.flags |= XE_VMA_PTE_2M;
2189                 break;
2190         }
2191
2192         return SZ_4K;
2193 }
2194
2195 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2196 {
2197         int err = 0;
2198
2199         lockdep_assert_held_write(&vm->lock);
2200
2201         switch (op->base.op) {
2202         case DRM_GPUVA_OP_MAP:
2203                 err |= xe_vm_insert_vma(vm, op->map.vma);
2204                 if (!err)
2205                         op->flags |= XE_VMA_OP_COMMITTED;
2206                 break;
2207         case DRM_GPUVA_OP_REMAP:
2208         {
2209                 u8 tile_present =
2210                         gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2211
2212                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2213                                  true);
2214                 op->flags |= XE_VMA_OP_COMMITTED;
2215
2216                 if (op->remap.prev) {
2217                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2218                         if (!err)
2219                                 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2220                         if (!err && op->remap.skip_prev) {
2221                                 op->remap.prev->tile_present =
2222                                         tile_present;
2223                                 op->remap.prev = NULL;
2224                         }
2225                 }
2226                 if (op->remap.next) {
2227                         err |= xe_vm_insert_vma(vm, op->remap.next);
2228                         if (!err)
2229                                 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2230                         if (!err && op->remap.skip_next) {
2231                                 op->remap.next->tile_present =
2232                                         tile_present;
2233                                 op->remap.next = NULL;
2234                         }
2235                 }
2236
2237                 /* Adjust for partial unbind after removin VMA from VM */
2238                 if (!err) {
2239                         op->base.remap.unmap->va->va.addr = op->remap.start;
2240                         op->base.remap.unmap->va->va.range = op->remap.range;
2241                 }
2242                 break;
2243         }
2244         case DRM_GPUVA_OP_UNMAP:
2245                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2246                 op->flags |= XE_VMA_OP_COMMITTED;
2247                 break;
2248         case DRM_GPUVA_OP_PREFETCH:
2249                 op->flags |= XE_VMA_OP_COMMITTED;
2250                 break;
2251         default:
2252                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2253         }
2254
2255         return err;
2256 }
2257
2258
2259 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2260                                    struct drm_gpuva_ops *ops,
2261                                    struct xe_sync_entry *syncs, u32 num_syncs,
2262                                    struct list_head *ops_list, bool last)
2263 {
2264         struct xe_vma_op *last_op = NULL;
2265         struct drm_gpuva_op *__op;
2266         int err = 0;
2267
2268         lockdep_assert_held_write(&vm->lock);
2269
2270         drm_gpuva_for_each_op(__op, ops) {
2271                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2272                 struct xe_vma *vma;
2273                 bool first = list_empty(ops_list);
2274                 unsigned int flags = 0;
2275
2276                 INIT_LIST_HEAD(&op->link);
2277                 list_add_tail(&op->link, ops_list);
2278
2279                 if (first) {
2280                         op->flags |= XE_VMA_OP_FIRST;
2281                         op->num_syncs = num_syncs;
2282                         op->syncs = syncs;
2283                 }
2284
2285                 op->q = q;
2286
2287                 switch (op->base.op) {
2288                 case DRM_GPUVA_OP_MAP:
2289                 {
2290                         flags |= op->map.read_only ?
2291                                 VMA_CREATE_FLAG_READ_ONLY : 0;
2292                         flags |= op->map.is_null ?
2293                                 VMA_CREATE_FLAG_IS_NULL : 0;
2294
2295                         vma = new_vma(vm, &op->base.map, op->map.pat_index,
2296                                       flags);
2297                         if (IS_ERR(vma))
2298                                 return PTR_ERR(vma);
2299
2300                         op->map.vma = vma;
2301                         break;
2302                 }
2303                 case DRM_GPUVA_OP_REMAP:
2304                 {
2305                         struct xe_vma *old =
2306                                 gpuva_to_vma(op->base.remap.unmap->va);
2307
2308                         op->remap.start = xe_vma_start(old);
2309                         op->remap.range = xe_vma_size(old);
2310
2311                         if (op->base.remap.prev) {
2312                                 flags |= op->base.remap.unmap->va->flags &
2313                                         XE_VMA_READ_ONLY ?
2314                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2315                                 flags |= op->base.remap.unmap->va->flags &
2316                                         DRM_GPUVA_SPARSE ?
2317                                         VMA_CREATE_FLAG_IS_NULL : 0;
2318
2319                                 vma = new_vma(vm, op->base.remap.prev,
2320                                               old->pat_index, flags);
2321                                 if (IS_ERR(vma))
2322                                         return PTR_ERR(vma);
2323
2324                                 op->remap.prev = vma;
2325
2326                                 /*
2327                                  * Userptr creates a new SG mapping so
2328                                  * we must also rebind.
2329                                  */
2330                                 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2331                                         IS_ALIGNED(xe_vma_end(vma),
2332                                                    xe_vma_max_pte_size(old));
2333                                 if (op->remap.skip_prev) {
2334                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2335                                         op->remap.range -=
2336                                                 xe_vma_end(vma) -
2337                                                 xe_vma_start(old);
2338                                         op->remap.start = xe_vma_end(vma);
2339                                 }
2340                         }
2341
2342                         if (op->base.remap.next) {
2343                                 flags |= op->base.remap.unmap->va->flags &
2344                                         XE_VMA_READ_ONLY ?
2345                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2346                                 flags |= op->base.remap.unmap->va->flags &
2347                                         DRM_GPUVA_SPARSE ?
2348                                         VMA_CREATE_FLAG_IS_NULL : 0;
2349
2350                                 vma = new_vma(vm, op->base.remap.next,
2351                                               old->pat_index, flags);
2352                                 if (IS_ERR(vma))
2353                                         return PTR_ERR(vma);
2354
2355                                 op->remap.next = vma;
2356
2357                                 /*
2358                                  * Userptr creates a new SG mapping so
2359                                  * we must also rebind.
2360                                  */
2361                                 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2362                                         IS_ALIGNED(xe_vma_start(vma),
2363                                                    xe_vma_max_pte_size(old));
2364                                 if (op->remap.skip_next) {
2365                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2366                                         op->remap.range -=
2367                                                 xe_vma_end(old) -
2368                                                 xe_vma_start(vma);
2369                                 }
2370                         }
2371                         break;
2372                 }
2373                 case DRM_GPUVA_OP_UNMAP:
2374                 case DRM_GPUVA_OP_PREFETCH:
2375                         /* Nothing to do */
2376                         break;
2377                 default:
2378                         drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2379                 }
2380
2381                 last_op = op;
2382
2383                 err = xe_vma_op_commit(vm, op);
2384                 if (err)
2385                         return err;
2386         }
2387
2388         /* FIXME: Unhandled corner case */
2389         XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2390
2391         if (!last_op)
2392                 return 0;
2393
2394         last_op->ops = ops;
2395         if (last) {
2396                 last_op->flags |= XE_VMA_OP_LAST;
2397                 last_op->num_syncs = num_syncs;
2398                 last_op->syncs = syncs;
2399         }
2400
2401         return 0;
2402 }
2403
2404 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2405                       struct xe_vma *vma, struct xe_vma_op *op)
2406 {
2407         int err;
2408
2409         lockdep_assert_held_write(&vm->lock);
2410
2411         err = xe_vm_prepare_vma(exec, vma, 1);
2412         if (err)
2413                 return err;
2414
2415         xe_vm_assert_held(vm);
2416         xe_bo_assert_held(xe_vma_bo(vma));
2417
2418         switch (op->base.op) {
2419         case DRM_GPUVA_OP_MAP:
2420                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2421                                  op->syncs, op->num_syncs,
2422                                  op->map.immediate || !xe_vm_in_fault_mode(vm),
2423                                  op->flags & XE_VMA_OP_FIRST,
2424                                  op->flags & XE_VMA_OP_LAST);
2425                 break;
2426         case DRM_GPUVA_OP_REMAP:
2427         {
2428                 bool prev = !!op->remap.prev;
2429                 bool next = !!op->remap.next;
2430
2431                 if (!op->remap.unmap_done) {
2432                         if (prev || next)
2433                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2434                         err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2435                                            op->num_syncs,
2436                                            op->flags & XE_VMA_OP_FIRST,
2437                                            op->flags & XE_VMA_OP_LAST &&
2438                                            !prev && !next);
2439                         if (err)
2440                                 break;
2441                         op->remap.unmap_done = true;
2442                 }
2443
2444                 if (prev) {
2445                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2446                         err = xe_vm_bind(vm, op->remap.prev, op->q,
2447                                          xe_vma_bo(op->remap.prev), op->syncs,
2448                                          op->num_syncs, true, false,
2449                                          op->flags & XE_VMA_OP_LAST && !next);
2450                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2451                         if (err)
2452                                 break;
2453                         op->remap.prev = NULL;
2454                 }
2455
2456                 if (next) {
2457                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2458                         err = xe_vm_bind(vm, op->remap.next, op->q,
2459                                          xe_vma_bo(op->remap.next),
2460                                          op->syncs, op->num_syncs,
2461                                          true, false,
2462                                          op->flags & XE_VMA_OP_LAST);
2463                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2464                         if (err)
2465                                 break;
2466                         op->remap.next = NULL;
2467                 }
2468
2469                 break;
2470         }
2471         case DRM_GPUVA_OP_UNMAP:
2472                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2473                                    op->num_syncs, op->flags & XE_VMA_OP_FIRST,
2474                                    op->flags & XE_VMA_OP_LAST);
2475                 break;
2476         case DRM_GPUVA_OP_PREFETCH:
2477                 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2478                                      op->syncs, op->num_syncs,
2479                                      op->flags & XE_VMA_OP_FIRST,
2480                                      op->flags & XE_VMA_OP_LAST);
2481                 break;
2482         default:
2483                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2484         }
2485
2486         if (err)
2487                 trace_xe_vma_fail(vma);
2488
2489         return err;
2490 }
2491
2492 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2493                                struct xe_vma_op *op)
2494 {
2495         struct drm_exec exec;
2496         int err;
2497
2498 retry_userptr:
2499         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2500         drm_exec_until_all_locked(&exec) {
2501                 err = op_execute(&exec, vm, vma, op);
2502                 drm_exec_retry_on_contention(&exec);
2503                 if (err)
2504                         break;
2505         }
2506         drm_exec_fini(&exec);
2507
2508         if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2509                 lockdep_assert_held_write(&vm->lock);
2510                 err = xe_vma_userptr_pin_pages(vma);
2511                 if (!err)
2512                         goto retry_userptr;
2513
2514                 trace_xe_vma_fail(vma);
2515         }
2516
2517         return err;
2518 }
2519
2520 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2521 {
2522         int ret = 0;
2523
2524         lockdep_assert_held_write(&vm->lock);
2525
2526 #ifdef TEST_VM_ASYNC_OPS_ERROR
2527         if (op->inject_error) {
2528                 op->inject_error = false;
2529                 return -ENOMEM;
2530         }
2531 #endif
2532
2533         switch (op->base.op) {
2534         case DRM_GPUVA_OP_MAP:
2535                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2536                 break;
2537         case DRM_GPUVA_OP_REMAP:
2538         {
2539                 struct xe_vma *vma;
2540
2541                 if (!op->remap.unmap_done)
2542                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2543                 else if (op->remap.prev)
2544                         vma = op->remap.prev;
2545                 else
2546                         vma = op->remap.next;
2547
2548                 ret = __xe_vma_op_execute(vm, vma, op);
2549                 break;
2550         }
2551         case DRM_GPUVA_OP_UNMAP:
2552                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2553                                           op);
2554                 break;
2555         case DRM_GPUVA_OP_PREFETCH:
2556                 ret = __xe_vma_op_execute(vm,
2557                                           gpuva_to_vma(op->base.prefetch.va),
2558                                           op);
2559                 break;
2560         default:
2561                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2562         }
2563
2564         return ret;
2565 }
2566
2567 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2568 {
2569         bool last = op->flags & XE_VMA_OP_LAST;
2570
2571         if (last) {
2572                 while (op->num_syncs--)
2573                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2574                 kfree(op->syncs);
2575                 if (op->q)
2576                         xe_exec_queue_put(op->q);
2577         }
2578         if (!list_empty(&op->link))
2579                 list_del(&op->link);
2580         if (op->ops)
2581                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2582         if (last)
2583                 xe_vm_put(vm);
2584 }
2585
2586 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2587                              bool post_commit, bool prev_post_commit,
2588                              bool next_post_commit)
2589 {
2590         lockdep_assert_held_write(&vm->lock);
2591
2592         switch (op->base.op) {
2593         case DRM_GPUVA_OP_MAP:
2594                 if (op->map.vma) {
2595                         prep_vma_destroy(vm, op->map.vma, post_commit);
2596                         xe_vma_destroy_unlocked(op->map.vma);
2597                 }
2598                 break;
2599         case DRM_GPUVA_OP_UNMAP:
2600         {
2601                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2602
2603                 if (vma) {
2604                         down_read(&vm->userptr.notifier_lock);
2605                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2606                         up_read(&vm->userptr.notifier_lock);
2607                         if (post_commit)
2608                                 xe_vm_insert_vma(vm, vma);
2609                 }
2610                 break;
2611         }
2612         case DRM_GPUVA_OP_REMAP:
2613         {
2614                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2615
2616                 if (op->remap.prev) {
2617                         prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2618                         xe_vma_destroy_unlocked(op->remap.prev);
2619                 }
2620                 if (op->remap.next) {
2621                         prep_vma_destroy(vm, op->remap.next, next_post_commit);
2622                         xe_vma_destroy_unlocked(op->remap.next);
2623                 }
2624                 if (vma) {
2625                         down_read(&vm->userptr.notifier_lock);
2626                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2627                         up_read(&vm->userptr.notifier_lock);
2628                         if (post_commit)
2629                                 xe_vm_insert_vma(vm, vma);
2630                 }
2631                 break;
2632         }
2633         case DRM_GPUVA_OP_PREFETCH:
2634                 /* Nothing to do */
2635                 break;
2636         default:
2637                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2638         }
2639 }
2640
2641 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2642                                      struct drm_gpuva_ops **ops,
2643                                      int num_ops_list)
2644 {
2645         int i;
2646
2647         for (i = num_ops_list - 1; i; ++i) {
2648                 struct drm_gpuva_ops *__ops = ops[i];
2649                 struct drm_gpuva_op *__op;
2650
2651                 if (!__ops)
2652                         continue;
2653
2654                 drm_gpuva_for_each_op_reverse(__op, __ops) {
2655                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2656
2657                         xe_vma_op_unwind(vm, op,
2658                                          op->flags & XE_VMA_OP_COMMITTED,
2659                                          op->flags & XE_VMA_OP_PREV_COMMITTED,
2660                                          op->flags & XE_VMA_OP_NEXT_COMMITTED);
2661                 }
2662
2663                 drm_gpuva_ops_free(&vm->gpuvm, __ops);
2664         }
2665 }
2666
2667 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2668                                      struct list_head *ops_list)
2669 {
2670         struct xe_vma_op *op, *next;
2671         int err;
2672
2673         lockdep_assert_held_write(&vm->lock);
2674
2675         list_for_each_entry_safe(op, next, ops_list, link) {
2676                 err = xe_vma_op_execute(vm, op);
2677                 if (err) {
2678                         drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2679                                  op->base.op, err);
2680                         /*
2681                          * FIXME: Killing VM rather than proper error handling
2682                          */
2683                         xe_vm_kill(vm);
2684                         return -ENOSPC;
2685                 }
2686                 xe_vma_op_cleanup(vm, op);
2687         }
2688
2689         return 0;
2690 }
2691
2692 #ifdef TEST_VM_ASYNC_OPS_ERROR
2693 #define SUPPORTED_FLAGS \
2694         (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
2695          DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
2696 #else
2697 #define SUPPORTED_FLAGS \
2698         (DRM_XE_VM_BIND_FLAG_READONLY | \
2699          DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
2700          0xffff)
2701 #endif
2702 #define XE_64K_PAGE_MASK 0xffffull
2703 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2704
2705 #define MAX_BINDS       512     /* FIXME: Picking random upper limit */
2706
2707 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2708                                     struct drm_xe_vm_bind *args,
2709                                     struct drm_xe_vm_bind_op **bind_ops)
2710 {
2711         int err;
2712         int i;
2713
2714         if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2715             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2716                 return -EINVAL;
2717
2718         if (XE_IOCTL_DBG(xe, args->extensions) ||
2719             XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
2720                 return -EINVAL;
2721
2722         if (args->num_binds > 1) {
2723                 u64 __user *bind_user =
2724                         u64_to_user_ptr(args->vector_of_binds);
2725
2726                 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
2727                                     args->num_binds, GFP_KERNEL);
2728                 if (!*bind_ops)
2729                         return -ENOMEM;
2730
2731                 err = __copy_from_user(*bind_ops, bind_user,
2732                                        sizeof(struct drm_xe_vm_bind_op) *
2733                                        args->num_binds);
2734                 if (XE_IOCTL_DBG(xe, err)) {
2735                         err = -EFAULT;
2736                         goto free_bind_ops;
2737                 }
2738         } else {
2739                 *bind_ops = &args->bind;
2740         }
2741
2742         for (i = 0; i < args->num_binds; ++i) {
2743                 u64 range = (*bind_ops)[i].range;
2744                 u64 addr = (*bind_ops)[i].addr;
2745                 u32 op = (*bind_ops)[i].op;
2746                 u32 flags = (*bind_ops)[i].flags;
2747                 u32 obj = (*bind_ops)[i].obj;
2748                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2749                 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2750                 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2751                 u16 pat_index = (*bind_ops)[i].pat_index;
2752                 u16 coh_mode;
2753
2754                 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2755                         err = -EINVAL;
2756                         goto free_bind_ops;
2757                 }
2758
2759                 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2760                 (*bind_ops)[i].pat_index = pat_index;
2761                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2762                 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2763                         err = -EINVAL;
2764                         goto free_bind_ops;
2765                 }
2766
2767                 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2768                         err = -EINVAL;
2769                         goto free_bind_ops;
2770                 }
2771
2772                 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2773                     XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2774                     XE_IOCTL_DBG(xe, obj && is_null) ||
2775                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2776                     XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2777                                  is_null) ||
2778                     XE_IOCTL_DBG(xe, !obj &&
2779                                  op == DRM_XE_VM_BIND_OP_MAP &&
2780                                  !is_null) ||
2781                     XE_IOCTL_DBG(xe, !obj &&
2782                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2783                     XE_IOCTL_DBG(xe, addr &&
2784                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2785                     XE_IOCTL_DBG(xe, range &&
2786                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2787                     XE_IOCTL_DBG(xe, obj &&
2788                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2789                     XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2790                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2791                     XE_IOCTL_DBG(xe, obj &&
2792                                  op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2793                     XE_IOCTL_DBG(xe, prefetch_region &&
2794                                  op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2795                     XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2796                                        xe->info.mem_region_mask)) ||
2797                     XE_IOCTL_DBG(xe, obj &&
2798                                  op == DRM_XE_VM_BIND_OP_UNMAP)) {
2799                         err = -EINVAL;
2800                         goto free_bind_ops;
2801                 }
2802
2803                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2804                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2805                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2806                     XE_IOCTL_DBG(xe, !range &&
2807                                  op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2808                         err = -EINVAL;
2809                         goto free_bind_ops;
2810                 }
2811         }
2812
2813         return 0;
2814
2815 free_bind_ops:
2816         if (args->num_binds > 1)
2817                 kfree(*bind_ops);
2818         return err;
2819 }
2820
2821 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2822                                        struct xe_exec_queue *q,
2823                                        struct xe_sync_entry *syncs,
2824                                        int num_syncs)
2825 {
2826         struct dma_fence *fence;
2827         int i, err = 0;
2828
2829         fence = xe_sync_in_fence_get(syncs, num_syncs,
2830                                      to_wait_exec_queue(vm, q), vm);
2831         if (IS_ERR(fence))
2832                 return PTR_ERR(fence);
2833
2834         for (i = 0; i < num_syncs; i++)
2835                 xe_sync_entry_signal(&syncs[i], NULL, fence);
2836
2837         xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2838                                      fence);
2839         dma_fence_put(fence);
2840
2841         return err;
2842 }
2843
2844 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2845 {
2846         struct xe_device *xe = to_xe_device(dev);
2847         struct xe_file *xef = to_xe_file(file);
2848         struct drm_xe_vm_bind *args = data;
2849         struct drm_xe_sync __user *syncs_user;
2850         struct xe_bo **bos = NULL;
2851         struct drm_gpuva_ops **ops = NULL;
2852         struct xe_vm *vm;
2853         struct xe_exec_queue *q = NULL;
2854         u32 num_syncs;
2855         struct xe_sync_entry *syncs = NULL;
2856         struct drm_xe_vm_bind_op *bind_ops;
2857         LIST_HEAD(ops_list);
2858         int err;
2859         int i;
2860
2861         err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2862         if (err)
2863                 return err;
2864
2865         if (args->exec_queue_id) {
2866                 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2867                 if (XE_IOCTL_DBG(xe, !q)) {
2868                         err = -ENOENT;
2869                         goto free_objs;
2870                 }
2871
2872                 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2873                         err = -EINVAL;
2874                         goto put_exec_queue;
2875                 }
2876         }
2877
2878         vm = xe_vm_lookup(xef, args->vm_id);
2879         if (XE_IOCTL_DBG(xe, !vm)) {
2880                 err = -EINVAL;
2881                 goto put_exec_queue;
2882         }
2883
2884         err = down_write_killable(&vm->lock);
2885         if (err)
2886                 goto put_vm;
2887
2888         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2889                 err = -ENOENT;
2890                 goto release_vm_lock;
2891         }
2892
2893         for (i = 0; i < args->num_binds; ++i) {
2894                 u64 range = bind_ops[i].range;
2895                 u64 addr = bind_ops[i].addr;
2896
2897                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2898                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2899                         err = -EINVAL;
2900                         goto release_vm_lock;
2901                 }
2902         }
2903
2904         if (args->num_binds) {
2905                 bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
2906                 if (!bos) {
2907                         err = -ENOMEM;
2908                         goto release_vm_lock;
2909                 }
2910
2911                 ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
2912                 if (!ops) {
2913                         err = -ENOMEM;
2914                         goto release_vm_lock;
2915                 }
2916         }
2917
2918         for (i = 0; i < args->num_binds; ++i) {
2919                 struct drm_gem_object *gem_obj;
2920                 u64 range = bind_ops[i].range;
2921                 u64 addr = bind_ops[i].addr;
2922                 u32 obj = bind_ops[i].obj;
2923                 u64 obj_offset = bind_ops[i].obj_offset;
2924                 u16 pat_index = bind_ops[i].pat_index;
2925                 u16 coh_mode;
2926
2927                 if (!obj)
2928                         continue;
2929
2930                 gem_obj = drm_gem_object_lookup(file, obj);
2931                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
2932                         err = -ENOENT;
2933                         goto put_obj;
2934                 }
2935                 bos[i] = gem_to_xe_bo(gem_obj);
2936
2937                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
2938                     XE_IOCTL_DBG(xe, obj_offset >
2939                                  bos[i]->size - range)) {
2940                         err = -EINVAL;
2941                         goto put_obj;
2942                 }
2943
2944                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
2945                         if (XE_IOCTL_DBG(xe, obj_offset &
2946                                          XE_64K_PAGE_MASK) ||
2947                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2948                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2949                                 err = -EINVAL;
2950                                 goto put_obj;
2951                         }
2952                 }
2953
2954                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2955                 if (bos[i]->cpu_caching) {
2956                         if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2957                                          bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2958                                 err = -EINVAL;
2959                                 goto put_obj;
2960                         }
2961                 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2962                         /*
2963                          * Imported dma-buf from a different device should
2964                          * require 1way or 2way coherency since we don't know
2965                          * how it was mapped on the CPU. Just assume is it
2966                          * potentially cached on CPU side.
2967                          */
2968                         err = -EINVAL;
2969                         goto put_obj;
2970                 }
2971         }
2972
2973         if (args->num_syncs) {
2974                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
2975                 if (!syncs) {
2976                         err = -ENOMEM;
2977                         goto put_obj;
2978                 }
2979         }
2980
2981         syncs_user = u64_to_user_ptr(args->syncs);
2982         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
2983                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
2984                                           &syncs_user[num_syncs],
2985                                           (xe_vm_in_lr_mode(vm) ?
2986                                            SYNC_PARSE_FLAG_LR_MODE : 0) |
2987                                           (!args->num_binds ?
2988                                            SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
2989                 if (err)
2990                         goto free_syncs;
2991         }
2992
2993         if (!args->num_binds) {
2994                 err = -ENODATA;
2995                 goto free_syncs;
2996         }
2997
2998         for (i = 0; i < args->num_binds; ++i) {
2999                 u64 range = bind_ops[i].range;
3000                 u64 addr = bind_ops[i].addr;
3001                 u32 op = bind_ops[i].op;
3002                 u32 flags = bind_ops[i].flags;
3003                 u64 obj_offset = bind_ops[i].obj_offset;
3004                 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3005                 u16 pat_index = bind_ops[i].pat_index;
3006
3007                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3008                                                   addr, range, op, flags,
3009                                                   prefetch_region, pat_index);
3010                 if (IS_ERR(ops[i])) {
3011                         err = PTR_ERR(ops[i]);
3012                         ops[i] = NULL;
3013                         goto unwind_ops;
3014                 }
3015
3016                 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3017                                               &ops_list,
3018                                               i == args->num_binds - 1);
3019                 if (err)
3020                         goto unwind_ops;
3021         }
3022
3023         /* Nothing to do */
3024         if (list_empty(&ops_list)) {
3025                 err = -ENODATA;
3026                 goto unwind_ops;
3027         }
3028
3029         xe_vm_get(vm);
3030         if (q)
3031                 xe_exec_queue_get(q);
3032
3033         err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3034
3035         up_write(&vm->lock);
3036
3037         if (q)
3038                 xe_exec_queue_put(q);
3039         xe_vm_put(vm);
3040
3041         for (i = 0; bos && i < args->num_binds; ++i)
3042                 xe_bo_put(bos[i]);
3043
3044         kfree(bos);
3045         kfree(ops);
3046         if (args->num_binds > 1)
3047                 kfree(bind_ops);
3048
3049         return err;
3050
3051 unwind_ops:
3052         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3053 free_syncs:
3054         if (err == -ENODATA)
3055                 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3056         while (num_syncs--)
3057                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3058
3059         kfree(syncs);
3060 put_obj:
3061         for (i = 0; i < args->num_binds; ++i)
3062                 xe_bo_put(bos[i]);
3063 release_vm_lock:
3064         up_write(&vm->lock);
3065 put_vm:
3066         xe_vm_put(vm);
3067 put_exec_queue:
3068         if (q)
3069                 xe_exec_queue_put(q);
3070 free_objs:
3071         kfree(bos);
3072         kfree(ops);
3073         if (args->num_binds > 1)
3074                 kfree(bind_ops);
3075         return err;
3076 }
3077
3078 /**
3079  * xe_vm_lock() - Lock the vm's dma_resv object
3080  * @vm: The struct xe_vm whose lock is to be locked
3081  * @intr: Whether to perform any wait interruptible
3082  *
3083  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3084  * contended lock was interrupted. If @intr is false, the function
3085  * always returns 0.
3086  */
3087 int xe_vm_lock(struct xe_vm *vm, bool intr)
3088 {
3089         if (intr)
3090                 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3091
3092         return dma_resv_lock(xe_vm_resv(vm), NULL);
3093 }
3094
3095 /**
3096  * xe_vm_unlock() - Unlock the vm's dma_resv object
3097  * @vm: The struct xe_vm whose lock is to be released.
3098  *
3099  * Unlock a buffer object lock that was locked by xe_vm_lock().
3100  */
3101 void xe_vm_unlock(struct xe_vm *vm)
3102 {
3103         dma_resv_unlock(xe_vm_resv(vm));
3104 }
3105
3106 /**
3107  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3108  * @vma: VMA to invalidate
3109  *
3110  * Walks a list of page tables leaves which it memset the entries owned by this
3111  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3112  * complete.
3113  *
3114  * Returns 0 for success, negative error code otherwise.
3115  */
3116 int xe_vm_invalidate_vma(struct xe_vma *vma)
3117 {
3118         struct xe_device *xe = xe_vma_vm(vma)->xe;
3119         struct xe_tile *tile;
3120         u32 tile_needs_invalidate = 0;
3121         int seqno[XE_MAX_TILES_PER_DEVICE];
3122         u8 id;
3123         int ret;
3124
3125         xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
3126         xe_assert(xe, !xe_vma_is_null(vma));
3127         trace_xe_vma_usm_invalidate(vma);
3128
3129         /* Check that we don't race with page-table updates */
3130         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3131                 if (xe_vma_is_userptr(vma)) {
3132                         WARN_ON_ONCE(!mmu_interval_check_retry
3133                                      (&vma->userptr.notifier,
3134                                       vma->userptr.notifier_seq));
3135                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3136                                                              DMA_RESV_USAGE_BOOKKEEP));
3137
3138                 } else {
3139                         xe_bo_assert_held(xe_vma_bo(vma));
3140                 }
3141         }
3142
3143         for_each_tile(tile, xe, id) {
3144                 if (xe_pt_zap_ptes(tile, vma)) {
3145                         tile_needs_invalidate |= BIT(id);
3146                         xe_device_wmb(xe);
3147                         /*
3148                          * FIXME: We potentially need to invalidate multiple
3149                          * GTs within the tile
3150                          */
3151                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3152                         if (seqno[id] < 0)
3153                                 return seqno[id];
3154                 }
3155         }
3156
3157         for_each_tile(tile, xe, id) {
3158                 if (tile_needs_invalidate & BIT(id)) {
3159                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3160                         if (ret < 0)
3161                                 return ret;
3162                 }
3163         }
3164
3165         vma->usm.tile_invalidated = vma->tile_mask;
3166
3167         return 0;
3168 }
3169
3170 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3171 {
3172         struct drm_gpuva *gpuva;
3173         bool is_vram;
3174         uint64_t addr;
3175
3176         if (!down_read_trylock(&vm->lock)) {
3177                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3178                 return 0;
3179         }
3180         if (vm->pt_root[gt_id]) {
3181                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3182                 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3183                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3184                            is_vram ? "VRAM" : "SYS");
3185         }
3186
3187         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3188                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3189                 bool is_userptr = xe_vma_is_userptr(vma);
3190                 bool is_null = xe_vma_is_null(vma);
3191
3192                 if (is_null) {
3193                         addr = 0;
3194                 } else if (is_userptr) {
3195                         struct xe_res_cursor cur;
3196
3197                         if (vma->userptr.sg) {
3198                                 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3199                                                 &cur);
3200                                 addr = xe_res_dma(&cur);
3201                         } else {
3202                                 addr = 0;
3203                         }
3204                 } else {
3205                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3206                         is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3207                 }
3208                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3209                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3210                            xe_vma_size(vma),
3211                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3212                            is_vram ? "VRAM" : "SYS");
3213         }
3214         up_read(&vm->lock);
3215
3216         return 0;
3217 }