drm/xe: Rework rebinding
[linux-2.6-block.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_execbuf_util.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "xe_assert.h"
25 #include "xe_bo.h"
26 #include "xe_device.h"
27 #include "xe_drm_client.h"
28 #include "xe_exec_queue.h"
29 #include "xe_gt.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_res_cursor.h"
38 #include "xe_sync.h"
39 #include "xe_trace.h"
40 #include "xe_wa.h"
41
42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43 {
44         return vm->gpuvm.r_obj;
45 }
46
47 /**
48  * xe_vma_userptr_check_repin() - Advisory check for repin needed
49  * @uvma: The userptr vma
50  *
51  * Check if the userptr vma has been invalidated since last successful
52  * repin. The check is advisory only and can the function can be called
53  * without the vm->userptr.notifier_lock held. There is no guarantee that the
54  * vma userptr will remain valid after a lockless check, so typically
55  * the call needs to be followed by a proper check under the notifier_lock.
56  *
57  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58  */
59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
60 {
61         return mmu_interval_check_retry(&uvma->userptr.notifier,
62                                         uvma->userptr.notifier_seq) ?
63                 -EAGAIN : 0;
64 }
65
66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
67 {
68         struct xe_userptr *userptr = &uvma->userptr;
69         struct xe_vma *vma = &uvma->vma;
70         struct xe_vm *vm = xe_vma_vm(vma);
71         struct xe_device *xe = vm->xe;
72         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
73         struct page **pages;
74         bool in_kthread = !current->mm;
75         unsigned long notifier_seq;
76         int pinned, ret, i;
77         bool read_only = xe_vma_read_only(vma);
78
79         lockdep_assert_held(&vm->lock);
80         xe_assert(xe, xe_vma_is_userptr(vma));
81 retry:
82         if (vma->gpuva.flags & XE_VMA_DESTROYED)
83                 return 0;
84
85         notifier_seq = mmu_interval_read_begin(&userptr->notifier);
86         if (notifier_seq == userptr->notifier_seq)
87                 return 0;
88
89         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
90         if (!pages)
91                 return -ENOMEM;
92
93         if (userptr->sg) {
94                 dma_unmap_sgtable(xe->drm.dev,
95                                   userptr->sg,
96                                   read_only ? DMA_TO_DEVICE :
97                                   DMA_BIDIRECTIONAL, 0);
98                 sg_free_table(userptr->sg);
99                 userptr->sg = NULL;
100         }
101
102         pinned = ret = 0;
103         if (in_kthread) {
104                 if (!mmget_not_zero(userptr->notifier.mm)) {
105                         ret = -EFAULT;
106                         goto mm_closed;
107                 }
108                 kthread_use_mm(userptr->notifier.mm);
109         }
110
111         while (pinned < num_pages) {
112                 ret = get_user_pages_fast(xe_vma_userptr(vma) +
113                                           pinned * PAGE_SIZE,
114                                           num_pages - pinned,
115                                           read_only ? 0 : FOLL_WRITE,
116                                           &pages[pinned]);
117                 if (ret < 0)
118                         break;
119
120                 pinned += ret;
121                 ret = 0;
122         }
123
124         if (in_kthread) {
125                 kthread_unuse_mm(userptr->notifier.mm);
126                 mmput(userptr->notifier.mm);
127         }
128 mm_closed:
129         if (ret)
130                 goto out;
131
132         ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
133                                                 pinned, 0,
134                                                 (u64)pinned << PAGE_SHIFT,
135                                                 xe_sg_segment_size(xe->drm.dev),
136                                                 GFP_KERNEL);
137         if (ret) {
138                 userptr->sg = NULL;
139                 goto out;
140         }
141         userptr->sg = &userptr->sgt;
142
143         ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
144                               read_only ? DMA_TO_DEVICE :
145                               DMA_BIDIRECTIONAL,
146                               DMA_ATTR_SKIP_CPU_SYNC |
147                               DMA_ATTR_NO_KERNEL_MAPPING);
148         if (ret) {
149                 sg_free_table(userptr->sg);
150                 userptr->sg = NULL;
151                 goto out;
152         }
153
154         for (i = 0; i < pinned; ++i) {
155                 if (!read_only) {
156                         lock_page(pages[i]);
157                         set_page_dirty(pages[i]);
158                         unlock_page(pages[i]);
159                 }
160
161                 mark_page_accessed(pages[i]);
162         }
163
164 out:
165         release_pages(pages, pinned);
166         kvfree(pages);
167
168         if (!(ret < 0)) {
169                 userptr->notifier_seq = notifier_seq;
170                 if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
171                         goto retry;
172         }
173
174         return ret < 0 ? ret : 0;
175 }
176
177 static bool preempt_fences_waiting(struct xe_vm *vm)
178 {
179         struct xe_exec_queue *q;
180
181         lockdep_assert_held(&vm->lock);
182         xe_vm_assert_held(vm);
183
184         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
185                 if (!q->compute.pfence ||
186                     (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
187                                                    &q->compute.pfence->flags))) {
188                         return true;
189                 }
190         }
191
192         return false;
193 }
194
195 static void free_preempt_fences(struct list_head *list)
196 {
197         struct list_head *link, *next;
198
199         list_for_each_safe(link, next, list)
200                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
201 }
202
203 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
204                                 unsigned int *count)
205 {
206         lockdep_assert_held(&vm->lock);
207         xe_vm_assert_held(vm);
208
209         if (*count >= vm->preempt.num_exec_queues)
210                 return 0;
211
212         for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
213                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
214
215                 if (IS_ERR(pfence))
216                         return PTR_ERR(pfence);
217
218                 list_move_tail(xe_preempt_fence_link(pfence), list);
219         }
220
221         return 0;
222 }
223
224 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
225 {
226         struct xe_exec_queue *q;
227
228         xe_vm_assert_held(vm);
229
230         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
231                 if (q->compute.pfence) {
232                         long timeout = dma_fence_wait(q->compute.pfence, false);
233
234                         if (timeout < 0)
235                                 return -ETIME;
236                         dma_fence_put(q->compute.pfence);
237                         q->compute.pfence = NULL;
238                 }
239         }
240
241         return 0;
242 }
243
244 static bool xe_vm_is_idle(struct xe_vm *vm)
245 {
246         struct xe_exec_queue *q;
247
248         xe_vm_assert_held(vm);
249         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
250                 if (!xe_exec_queue_is_idle(q))
251                         return false;
252         }
253
254         return true;
255 }
256
257 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
258 {
259         struct list_head *link;
260         struct xe_exec_queue *q;
261
262         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
263                 struct dma_fence *fence;
264
265                 link = list->next;
266                 xe_assert(vm->xe, link != list);
267
268                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
269                                              q, q->compute.context,
270                                              ++q->compute.seqno);
271                 dma_fence_put(q->compute.pfence);
272                 q->compute.pfence = fence;
273         }
274 }
275
276 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
277 {
278         struct xe_exec_queue *q;
279         int err;
280
281         if (!vm->preempt.num_exec_queues)
282                 return 0;
283
284         err = xe_bo_lock(bo, true);
285         if (err)
286                 return err;
287
288         err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
289         if (err)
290                 goto out_unlock;
291
292         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
293                 if (q->compute.pfence) {
294                         dma_resv_add_fence(bo->ttm.base.resv,
295                                            q->compute.pfence,
296                                            DMA_RESV_USAGE_BOOKKEEP);
297                 }
298
299 out_unlock:
300         xe_bo_unlock(bo);
301         return err;
302 }
303
304 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
305                                                 struct drm_exec *exec)
306 {
307         struct xe_exec_queue *q;
308
309         lockdep_assert_held(&vm->lock);
310         xe_vm_assert_held(vm);
311
312         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
313                 q->ops->resume(q);
314
315                 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
316                                          DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
317         }
318 }
319
320 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
321 {
322         struct drm_gpuvm_exec vm_exec = {
323                 .vm = &vm->gpuvm,
324                 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
325                 .num_fences = 1,
326         };
327         struct drm_exec *exec = &vm_exec.exec;
328         struct dma_fence *pfence;
329         int err;
330         bool wait;
331
332         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
333
334         down_write(&vm->lock);
335         err = drm_gpuvm_exec_lock(&vm_exec);
336         if (err)
337                 goto out_up_write;
338
339         pfence = xe_preempt_fence_create(q, q->compute.context,
340                                          ++q->compute.seqno);
341         if (!pfence) {
342                 err = -ENOMEM;
343                 goto out_fini;
344         }
345
346         list_add(&q->compute.link, &vm->preempt.exec_queues);
347         ++vm->preempt.num_exec_queues;
348         q->compute.pfence = pfence;
349
350         down_read(&vm->userptr.notifier_lock);
351
352         drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
353                                  DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
354
355         /*
356          * Check to see if a preemption on VM is in flight or userptr
357          * invalidation, if so trigger this preempt fence to sync state with
358          * other preempt fences on the VM.
359          */
360         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
361         if (wait)
362                 dma_fence_enable_sw_signaling(pfence);
363
364         up_read(&vm->userptr.notifier_lock);
365
366 out_fini:
367         drm_exec_fini(exec);
368 out_up_write:
369         up_write(&vm->lock);
370
371         return err;
372 }
373
374 /**
375  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
376  * @vm: The VM.
377  * @q: The exec_queue
378  */
379 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
380 {
381         if (!xe_vm_in_preempt_fence_mode(vm))
382                 return;
383
384         down_write(&vm->lock);
385         list_del(&q->compute.link);
386         --vm->preempt.num_exec_queues;
387         if (q->compute.pfence) {
388                 dma_fence_enable_sw_signaling(q->compute.pfence);
389                 dma_fence_put(q->compute.pfence);
390                 q->compute.pfence = NULL;
391         }
392         up_write(&vm->lock);
393 }
394
395 /**
396  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
397  * that need repinning.
398  * @vm: The VM.
399  *
400  * This function checks for whether the VM has userptrs that need repinning,
401  * and provides a release-type barrier on the userptr.notifier_lock after
402  * checking.
403  *
404  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
405  */
406 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
407 {
408         lockdep_assert_held_read(&vm->userptr.notifier_lock);
409
410         return (list_empty(&vm->userptr.repin_list) &&
411                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
412 }
413
414 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
415
416 static void xe_vm_kill(struct xe_vm *vm)
417 {
418         struct xe_exec_queue *q;
419
420         lockdep_assert_held(&vm->lock);
421
422         xe_vm_lock(vm, false);
423         vm->flags |= XE_VM_FLAG_BANNED;
424         trace_xe_vm_kill(vm);
425
426         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
427                 q->ops->kill(q);
428         xe_vm_unlock(vm);
429
430         /* TODO: Inform user the VM is banned */
431 }
432
433 /**
434  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
435  * @exec: The drm_exec object used for locking before validation.
436  * @err: The error returned from ttm_bo_validate().
437  * @end: A ktime_t cookie that should be set to 0 before first use and
438  * that should be reused on subsequent calls.
439  *
440  * With multiple active VMs, under memory pressure, it is possible that
441  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
442  * Until ttm properly handles locking in such scenarios, best thing the
443  * driver can do is retry with a timeout. Check if that is necessary, and
444  * if so unlock the drm_exec's objects while keeping the ticket to prepare
445  * for a rerun.
446  *
447  * Return: true if a retry after drm_exec_init() is recommended;
448  * false otherwise.
449  */
450 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
451 {
452         ktime_t cur;
453
454         if (err != -ENOMEM)
455                 return false;
456
457         cur = ktime_get();
458         *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
459         if (!ktime_before(cur, *end))
460                 return false;
461
462         msleep(20);
463         return true;
464 }
465
466 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
467 {
468         struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
469         struct drm_gpuva *gpuva;
470         int ret;
471
472         lockdep_assert_held(&vm->lock);
473         drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
474                 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
475                                &vm->rebind_list);
476
477         ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
478         if (ret)
479                 return ret;
480
481         vm_bo->evicted = false;
482         return 0;
483 }
484
485 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
486                                  bool *done)
487 {
488         int err;
489
490         /*
491          * 1 fence for each preempt fence plus a fence for each tile from a
492          * possible rebind
493          */
494         err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
495                                    vm->xe->info.tile_count);
496         if (err)
497                 return err;
498
499         if (xe_vm_is_idle(vm)) {
500                 vm->preempt.rebind_deactivated = true;
501                 *done = true;
502                 return 0;
503         }
504
505         if (!preempt_fences_waiting(vm)) {
506                 *done = true;
507                 return 0;
508         }
509
510         err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
511         if (err)
512                 return err;
513
514         err = wait_for_existing_preempt_fences(vm);
515         if (err)
516                 return err;
517
518         return drm_gpuvm_validate(&vm->gpuvm, exec);
519 }
520
521 static void preempt_rebind_work_func(struct work_struct *w)
522 {
523         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
524         struct drm_exec exec;
525         unsigned int fence_count = 0;
526         LIST_HEAD(preempt_fences);
527         ktime_t end = 0;
528         int err = 0;
529         long wait;
530         int __maybe_unused tries = 0;
531
532         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
533         trace_xe_vm_rebind_worker_enter(vm);
534
535         down_write(&vm->lock);
536
537         if (xe_vm_is_closed_or_banned(vm)) {
538                 up_write(&vm->lock);
539                 trace_xe_vm_rebind_worker_exit(vm);
540                 return;
541         }
542
543 retry:
544         if (xe_vm_userptr_check_repin(vm)) {
545                 err = xe_vm_userptr_pin(vm);
546                 if (err)
547                         goto out_unlock_outer;
548         }
549
550         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
551
552         drm_exec_until_all_locked(&exec) {
553                 bool done = false;
554
555                 err = xe_preempt_work_begin(&exec, vm, &done);
556                 drm_exec_retry_on_contention(&exec);
557                 if (err || done) {
558                         drm_exec_fini(&exec);
559                         if (err && xe_vm_validate_should_retry(&exec, err, &end))
560                                 err = -EAGAIN;
561
562                         goto out_unlock_outer;
563                 }
564         }
565
566         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
567         if (err)
568                 goto out_unlock;
569
570         err = xe_vm_rebind(vm, true);
571         if (err)
572                 goto out_unlock;
573
574         /* Wait on rebinds and munmap style VM unbinds */
575         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
576                                      DMA_RESV_USAGE_KERNEL,
577                                      false, MAX_SCHEDULE_TIMEOUT);
578         if (wait <= 0) {
579                 err = -ETIME;
580                 goto out_unlock;
581         }
582
583 #define retry_required(__tries, __vm) \
584         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
585         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
586         __xe_vm_userptr_needs_repin(__vm))
587
588         down_read(&vm->userptr.notifier_lock);
589         if (retry_required(tries, vm)) {
590                 up_read(&vm->userptr.notifier_lock);
591                 err = -EAGAIN;
592                 goto out_unlock;
593         }
594
595 #undef retry_required
596
597         spin_lock(&vm->xe->ttm.lru_lock);
598         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
599         spin_unlock(&vm->xe->ttm.lru_lock);
600
601         /* Point of no return. */
602         arm_preempt_fences(vm, &preempt_fences);
603         resume_and_reinstall_preempt_fences(vm, &exec);
604         up_read(&vm->userptr.notifier_lock);
605
606 out_unlock:
607         drm_exec_fini(&exec);
608 out_unlock_outer:
609         if (err == -EAGAIN) {
610                 trace_xe_vm_rebind_worker_retry(vm);
611                 goto retry;
612         }
613
614         if (err) {
615                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
616                 xe_vm_kill(vm);
617         }
618         up_write(&vm->lock);
619
620         free_preempt_fences(&preempt_fences);
621
622         trace_xe_vm_rebind_worker_exit(vm);
623 }
624
625 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
626                                    const struct mmu_notifier_range *range,
627                                    unsigned long cur_seq)
628 {
629         struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
630         struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
631         struct xe_vma *vma = &uvma->vma;
632         struct xe_vm *vm = xe_vma_vm(vma);
633         struct dma_resv_iter cursor;
634         struct dma_fence *fence;
635         long err;
636
637         xe_assert(vm->xe, xe_vma_is_userptr(vma));
638         trace_xe_vma_userptr_invalidate(vma);
639
640         if (!mmu_notifier_range_blockable(range))
641                 return false;
642
643         down_write(&vm->userptr.notifier_lock);
644         mmu_interval_set_seq(mni, cur_seq);
645
646         /* No need to stop gpu access if the userptr is not yet bound. */
647         if (!userptr->initial_bind) {
648                 up_write(&vm->userptr.notifier_lock);
649                 return true;
650         }
651
652         /*
653          * Tell exec and rebind worker they need to repin and rebind this
654          * userptr.
655          */
656         if (!xe_vm_in_fault_mode(vm) &&
657             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
658                 spin_lock(&vm->userptr.invalidated_lock);
659                 list_move_tail(&userptr->invalidate_link,
660                                &vm->userptr.invalidated);
661                 spin_unlock(&vm->userptr.invalidated_lock);
662         }
663
664         up_write(&vm->userptr.notifier_lock);
665
666         /*
667          * Preempt fences turn into schedule disables, pipeline these.
668          * Note that even in fault mode, we need to wait for binds and
669          * unbinds to complete, and those are attached as BOOKMARK fences
670          * to the vm.
671          */
672         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
673                             DMA_RESV_USAGE_BOOKKEEP);
674         dma_resv_for_each_fence_unlocked(&cursor, fence)
675                 dma_fence_enable_sw_signaling(fence);
676         dma_resv_iter_end(&cursor);
677
678         err = dma_resv_wait_timeout(xe_vm_resv(vm),
679                                     DMA_RESV_USAGE_BOOKKEEP,
680                                     false, MAX_SCHEDULE_TIMEOUT);
681         XE_WARN_ON(err <= 0);
682
683         if (xe_vm_in_fault_mode(vm)) {
684                 err = xe_vm_invalidate_vma(vma);
685                 XE_WARN_ON(err);
686         }
687
688         trace_xe_vma_userptr_invalidate_complete(vma);
689
690         return true;
691 }
692
693 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
694         .invalidate = vma_userptr_invalidate,
695 };
696
697 int xe_vm_userptr_pin(struct xe_vm *vm)
698 {
699         struct xe_userptr_vma *uvma, *next;
700         int err = 0;
701         LIST_HEAD(tmp_evict);
702
703         xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
704         lockdep_assert_held_write(&vm->lock);
705
706         /* Collect invalidated userptrs */
707         spin_lock(&vm->userptr.invalidated_lock);
708         list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
709                                  userptr.invalidate_link) {
710                 list_del_init(&uvma->userptr.invalidate_link);
711                 list_move_tail(&uvma->userptr.repin_link,
712                                &vm->userptr.repin_list);
713         }
714         spin_unlock(&vm->userptr.invalidated_lock);
715
716         /* Pin and move to temporary list */
717         list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
718                                  userptr.repin_link) {
719                 err = xe_vma_userptr_pin_pages(uvma);
720                 if (err == -EFAULT) {
721                         list_del_init(&uvma->userptr.repin_link);
722
723                         /* Wait for pending binds */
724                         xe_vm_lock(vm, false);
725                         dma_resv_wait_timeout(xe_vm_resv(vm),
726                                               DMA_RESV_USAGE_BOOKKEEP,
727                                               false, MAX_SCHEDULE_TIMEOUT);
728
729                         err = xe_vm_invalidate_vma(&uvma->vma);
730                         xe_vm_unlock(vm);
731                         if (err)
732                                 return err;
733                 } else {
734                         if (err < 0)
735                                 return err;
736
737                         list_del_init(&uvma->userptr.repin_link);
738                         list_move_tail(&uvma->vma.combined_links.rebind,
739                                        &vm->rebind_list);
740                 }
741         }
742
743         return 0;
744 }
745
746 /**
747  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
748  * that need repinning.
749  * @vm: The VM.
750  *
751  * This function does an advisory check for whether the VM has userptrs that
752  * need repinning.
753  *
754  * Return: 0 if there are no indications of userptrs needing repinning,
755  * -EAGAIN if there are.
756  */
757 int xe_vm_userptr_check_repin(struct xe_vm *vm)
758 {
759         return (list_empty_careful(&vm->userptr.repin_list) &&
760                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
761 }
762
763 static struct dma_fence *
764 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
765                struct xe_sync_entry *syncs, u32 num_syncs,
766                bool first_op, bool last_op);
767
768 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
769 {
770         struct dma_fence *fence;
771         struct xe_vma *vma, *next;
772
773         lockdep_assert_held(&vm->lock);
774         if (xe_vm_in_lr_mode(vm) && !rebind_worker)
775                 return 0;
776
777         xe_vm_assert_held(vm);
778         list_for_each_entry_safe(vma, next, &vm->rebind_list,
779                                  combined_links.rebind) {
780                 xe_assert(vm->xe, vma->tile_present);
781
782                 list_del_init(&vma->combined_links.rebind);
783                 if (rebind_worker)
784                         trace_xe_vma_rebind_worker(vma);
785                 else
786                         trace_xe_vma_rebind_exec(vma);
787                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
788                 if (IS_ERR(fence))
789                         return PTR_ERR(fence);
790                 dma_fence_put(fence);
791         }
792
793         return 0;
794 }
795
796 static void xe_vma_free(struct xe_vma *vma)
797 {
798         if (xe_vma_is_userptr(vma))
799                 kfree(to_userptr_vma(vma));
800         else
801                 kfree(vma);
802 }
803
804 #define VMA_CREATE_FLAG_READ_ONLY       BIT(0)
805 #define VMA_CREATE_FLAG_IS_NULL         BIT(1)
806 #define VMA_CREATE_FLAG_DUMPABLE        BIT(2)
807
808 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
809                                     struct xe_bo *bo,
810                                     u64 bo_offset_or_userptr,
811                                     u64 start, u64 end,
812                                     u16 pat_index, unsigned int flags)
813 {
814         struct xe_vma *vma;
815         struct xe_tile *tile;
816         u8 id;
817         bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
818         bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
819         bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
820
821         xe_assert(vm->xe, start < end);
822         xe_assert(vm->xe, end < vm->size);
823
824         /*
825          * Allocate and ensure that the xe_vma_is_userptr() return
826          * matches what was allocated.
827          */
828         if (!bo && !is_null) {
829                 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
830
831                 if (!uvma)
832                         return ERR_PTR(-ENOMEM);
833
834                 vma = &uvma->vma;
835         } else {
836                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
837                 if (!vma)
838                         return ERR_PTR(-ENOMEM);
839
840                 if (is_null)
841                         vma->gpuva.flags |= DRM_GPUVA_SPARSE;
842                 if (bo)
843                         vma->gpuva.gem.obj = &bo->ttm.base;
844         }
845
846         INIT_LIST_HEAD(&vma->combined_links.rebind);
847
848         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
849         vma->gpuva.vm = &vm->gpuvm;
850         vma->gpuva.va.addr = start;
851         vma->gpuva.va.range = end - start + 1;
852         if (read_only)
853                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
854         if (dumpable)
855                 vma->gpuva.flags |= XE_VMA_DUMPABLE;
856
857         for_each_tile(tile, vm->xe, id)
858                 vma->tile_mask |= 0x1 << id;
859
860         if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
861                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
862
863         vma->pat_index = pat_index;
864
865         if (bo) {
866                 struct drm_gpuvm_bo *vm_bo;
867
868                 xe_bo_assert_held(bo);
869
870                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
871                 if (IS_ERR(vm_bo)) {
872                         xe_vma_free(vma);
873                         return ERR_CAST(vm_bo);
874                 }
875
876                 drm_gpuvm_bo_extobj_add(vm_bo);
877                 drm_gem_object_get(&bo->ttm.base);
878                 vma->gpuva.gem.offset = bo_offset_or_userptr;
879                 drm_gpuva_link(&vma->gpuva, vm_bo);
880                 drm_gpuvm_bo_put(vm_bo);
881         } else /* userptr or null */ {
882                 if (!is_null) {
883                         struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
884                         u64 size = end - start + 1;
885                         int err;
886
887                         INIT_LIST_HEAD(&userptr->invalidate_link);
888                         INIT_LIST_HEAD(&userptr->repin_link);
889                         vma->gpuva.gem.offset = bo_offset_or_userptr;
890
891                         err = mmu_interval_notifier_insert(&userptr->notifier,
892                                                            current->mm,
893                                                            xe_vma_userptr(vma), size,
894                                                            &vma_userptr_notifier_ops);
895                         if (err) {
896                                 xe_vma_free(vma);
897                                 return ERR_PTR(err);
898                         }
899
900                         userptr->notifier_seq = LONG_MAX;
901                 }
902
903                 xe_vm_get(vm);
904         }
905
906         return vma;
907 }
908
909 static void xe_vma_destroy_late(struct xe_vma *vma)
910 {
911         struct xe_vm *vm = xe_vma_vm(vma);
912         struct xe_device *xe = vm->xe;
913         bool read_only = xe_vma_read_only(vma);
914
915         if (vma->ufence) {
916                 xe_sync_ufence_put(vma->ufence);
917                 vma->ufence = NULL;
918         }
919
920         if (xe_vma_is_userptr(vma)) {
921                 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
922
923                 if (userptr->sg) {
924                         dma_unmap_sgtable(xe->drm.dev,
925                                           userptr->sg,
926                                           read_only ? DMA_TO_DEVICE :
927                                           DMA_BIDIRECTIONAL, 0);
928                         sg_free_table(userptr->sg);
929                         userptr->sg = NULL;
930                 }
931
932                 /*
933                  * Since userptr pages are not pinned, we can't remove
934                  * the notifer until we're sure the GPU is not accessing
935                  * them anymore
936                  */
937                 mmu_interval_notifier_remove(&userptr->notifier);
938                 xe_vm_put(vm);
939         } else if (xe_vma_is_null(vma)) {
940                 xe_vm_put(vm);
941         } else {
942                 xe_bo_put(xe_vma_bo(vma));
943         }
944
945         xe_vma_free(vma);
946 }
947
948 static void vma_destroy_work_func(struct work_struct *w)
949 {
950         struct xe_vma *vma =
951                 container_of(w, struct xe_vma, destroy_work);
952
953         xe_vma_destroy_late(vma);
954 }
955
956 static void vma_destroy_cb(struct dma_fence *fence,
957                            struct dma_fence_cb *cb)
958 {
959         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
960
961         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
962         queue_work(system_unbound_wq, &vma->destroy_work);
963 }
964
965 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
966 {
967         struct xe_vm *vm = xe_vma_vm(vma);
968
969         lockdep_assert_held_write(&vm->lock);
970         xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
971
972         if (xe_vma_is_userptr(vma)) {
973                 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
974
975                 spin_lock(&vm->userptr.invalidated_lock);
976                 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
977                 spin_unlock(&vm->userptr.invalidated_lock);
978         } else if (!xe_vma_is_null(vma)) {
979                 xe_bo_assert_held(xe_vma_bo(vma));
980
981                 drm_gpuva_unlink(&vma->gpuva);
982         }
983
984         xe_vm_assert_held(vm);
985         if (fence) {
986                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
987                                                  vma_destroy_cb);
988
989                 if (ret) {
990                         XE_WARN_ON(ret != -ENOENT);
991                         xe_vma_destroy_late(vma);
992                 }
993         } else {
994                 xe_vma_destroy_late(vma);
995         }
996 }
997
998 /**
999  * xe_vm_prepare_vma() - drm_exec utility to lock a vma
1000  * @exec: The drm_exec object we're currently locking for.
1001  * @vma: The vma for witch we want to lock the vm resv and any attached
1002  * object's resv.
1003  * @num_shared: The number of dma-fence slots to pre-allocate in the
1004  * objects' reservation objects.
1005  *
1006  * Return: 0 on success, negative error code on error. In particular
1007  * may return -EDEADLK on WW transaction contention and -EINTR if
1008  * an interruptible wait is terminated by a signal.
1009  */
1010 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
1011                       unsigned int num_shared)
1012 {
1013         struct xe_vm *vm = xe_vma_vm(vma);
1014         struct xe_bo *bo = xe_vma_bo(vma);
1015         int err;
1016
1017         XE_WARN_ON(!vm);
1018         if (num_shared)
1019                 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
1020         else
1021                 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1022         if (!err && bo && !bo->vm) {
1023                 if (num_shared)
1024                         err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
1025                 else
1026                         err = drm_exec_lock_obj(exec, &bo->ttm.base);
1027         }
1028
1029         return err;
1030 }
1031
1032 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1033 {
1034         struct drm_exec exec;
1035         int err;
1036
1037         drm_exec_init(&exec, 0, 0);
1038         drm_exec_until_all_locked(&exec) {
1039                 err = xe_vm_prepare_vma(&exec, vma, 0);
1040                 drm_exec_retry_on_contention(&exec);
1041                 if (XE_WARN_ON(err))
1042                         break;
1043         }
1044
1045         xe_vma_destroy(vma, NULL);
1046
1047         drm_exec_fini(&exec);
1048 }
1049
1050 struct xe_vma *
1051 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1052 {
1053         struct drm_gpuva *gpuva;
1054
1055         lockdep_assert_held(&vm->lock);
1056
1057         if (xe_vm_is_closed_or_banned(vm))
1058                 return NULL;
1059
1060         xe_assert(vm->xe, start + range <= vm->size);
1061
1062         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1063
1064         return gpuva ? gpuva_to_vma(gpuva) : NULL;
1065 }
1066
1067 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1068 {
1069         int err;
1070
1071         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1072         lockdep_assert_held(&vm->lock);
1073
1074         mutex_lock(&vm->snap_mutex);
1075         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1076         mutex_unlock(&vm->snap_mutex);
1077         XE_WARN_ON(err);        /* Shouldn't be possible */
1078
1079         return err;
1080 }
1081
1082 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1083 {
1084         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1085         lockdep_assert_held(&vm->lock);
1086
1087         mutex_lock(&vm->snap_mutex);
1088         drm_gpuva_remove(&vma->gpuva);
1089         mutex_unlock(&vm->snap_mutex);
1090         if (vm->usm.last_fault_vma == vma)
1091                 vm->usm.last_fault_vma = NULL;
1092 }
1093
1094 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1095 {
1096         struct xe_vma_op *op;
1097
1098         op = kzalloc(sizeof(*op), GFP_KERNEL);
1099
1100         if (unlikely(!op))
1101                 return NULL;
1102
1103         return &op->base;
1104 }
1105
1106 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1107
1108 static const struct drm_gpuvm_ops gpuvm_ops = {
1109         .op_alloc = xe_vm_op_alloc,
1110         .vm_bo_validate = xe_gpuvm_validate,
1111         .vm_free = xe_vm_free,
1112 };
1113
1114 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1115 {
1116         u64 pte = 0;
1117
1118         if (pat_index & BIT(0))
1119                 pte |= XE_PPGTT_PTE_PAT0;
1120
1121         if (pat_index & BIT(1))
1122                 pte |= XE_PPGTT_PTE_PAT1;
1123
1124         return pte;
1125 }
1126
1127 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1128                                 u32 pt_level)
1129 {
1130         u64 pte = 0;
1131
1132         if (pat_index & BIT(0))
1133                 pte |= XE_PPGTT_PTE_PAT0;
1134
1135         if (pat_index & BIT(1))
1136                 pte |= XE_PPGTT_PTE_PAT1;
1137
1138         if (pat_index & BIT(2)) {
1139                 if (pt_level)
1140                         pte |= XE_PPGTT_PDE_PDPE_PAT2;
1141                 else
1142                         pte |= XE_PPGTT_PTE_PAT2;
1143         }
1144
1145         if (pat_index & BIT(3))
1146                 pte |= XELPG_PPGTT_PTE_PAT3;
1147
1148         if (pat_index & (BIT(4)))
1149                 pte |= XE2_PPGTT_PTE_PAT4;
1150
1151         return pte;
1152 }
1153
1154 static u64 pte_encode_ps(u32 pt_level)
1155 {
1156         XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1157
1158         if (pt_level == 1)
1159                 return XE_PDE_PS_2M;
1160         else if (pt_level == 2)
1161                 return XE_PDPE_PS_1G;
1162
1163         return 0;
1164 }
1165
1166 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1167                               const u16 pat_index)
1168 {
1169         struct xe_device *xe = xe_bo_device(bo);
1170         u64 pde;
1171
1172         pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1173         pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1174         pde |= pde_encode_pat_index(xe, pat_index);
1175
1176         return pde;
1177 }
1178
1179 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1180                               u16 pat_index, u32 pt_level)
1181 {
1182         struct xe_device *xe = xe_bo_device(bo);
1183         u64 pte;
1184
1185         pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1186         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1187         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1188         pte |= pte_encode_ps(pt_level);
1189
1190         if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1191                 pte |= XE_PPGTT_PTE_DM;
1192
1193         return pte;
1194 }
1195
1196 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1197                                u16 pat_index, u32 pt_level)
1198 {
1199         struct xe_device *xe = xe_vma_vm(vma)->xe;
1200
1201         pte |= XE_PAGE_PRESENT;
1202
1203         if (likely(!xe_vma_read_only(vma)))
1204                 pte |= XE_PAGE_RW;
1205
1206         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1207         pte |= pte_encode_ps(pt_level);
1208
1209         if (unlikely(xe_vma_is_null(vma)))
1210                 pte |= XE_PTE_NULL;
1211
1212         return pte;
1213 }
1214
1215 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1216                                 u16 pat_index,
1217                                 u32 pt_level, bool devmem, u64 flags)
1218 {
1219         u64 pte;
1220
1221         /* Avoid passing random bits directly as flags */
1222         xe_assert(xe, !(flags & ~XE_PTE_PS64));
1223
1224         pte = addr;
1225         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1226         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1227         pte |= pte_encode_ps(pt_level);
1228
1229         if (devmem)
1230                 pte |= XE_PPGTT_PTE_DM;
1231
1232         pte |= flags;
1233
1234         return pte;
1235 }
1236
1237 static const struct xe_pt_ops xelp_pt_ops = {
1238         .pte_encode_bo = xelp_pte_encode_bo,
1239         .pte_encode_vma = xelp_pte_encode_vma,
1240         .pte_encode_addr = xelp_pte_encode_addr,
1241         .pde_encode_bo = xelp_pde_encode_bo,
1242 };
1243
1244 static void vm_destroy_work_func(struct work_struct *w);
1245
1246 /**
1247  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1248  * given tile and vm.
1249  * @xe: xe device.
1250  * @tile: tile to set up for.
1251  * @vm: vm to set up for.
1252  *
1253  * Sets up a pagetable tree with one page-table per level and a single
1254  * leaf PTE. All pagetable entries point to the single page-table or,
1255  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1256  * writes become NOPs.
1257  *
1258  * Return: 0 on success, negative error code on error.
1259  */
1260 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1261                                 struct xe_vm *vm)
1262 {
1263         u8 id = tile->id;
1264         int i;
1265
1266         for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1267                 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1268                 if (IS_ERR(vm->scratch_pt[id][i]))
1269                         return PTR_ERR(vm->scratch_pt[id][i]);
1270
1271                 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1272         }
1273
1274         return 0;
1275 }
1276
1277 static void xe_vm_free_scratch(struct xe_vm *vm)
1278 {
1279         struct xe_tile *tile;
1280         u8 id;
1281
1282         if (!xe_vm_has_scratch(vm))
1283                 return;
1284
1285         for_each_tile(tile, vm->xe, id) {
1286                 u32 i;
1287
1288                 if (!vm->pt_root[id])
1289                         continue;
1290
1291                 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1292                         if (vm->scratch_pt[id][i])
1293                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1294         }
1295 }
1296
1297 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1298 {
1299         struct drm_gem_object *vm_resv_obj;
1300         struct xe_vm *vm;
1301         int err, number_tiles = 0;
1302         struct xe_tile *tile;
1303         u8 id;
1304
1305         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1306         if (!vm)
1307                 return ERR_PTR(-ENOMEM);
1308
1309         vm->xe = xe;
1310
1311         vm->size = 1ull << xe->info.va_bits;
1312
1313         vm->flags = flags;
1314
1315         init_rwsem(&vm->lock);
1316         mutex_init(&vm->snap_mutex);
1317
1318         INIT_LIST_HEAD(&vm->rebind_list);
1319
1320         INIT_LIST_HEAD(&vm->userptr.repin_list);
1321         INIT_LIST_HEAD(&vm->userptr.invalidated);
1322         init_rwsem(&vm->userptr.notifier_lock);
1323         spin_lock_init(&vm->userptr.invalidated_lock);
1324
1325         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1326
1327         INIT_LIST_HEAD(&vm->preempt.exec_queues);
1328         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1329
1330         for_each_tile(tile, xe, id)
1331                 xe_range_fence_tree_init(&vm->rftree[id]);
1332
1333         vm->pt_ops = &xelp_pt_ops;
1334
1335         if (!(flags & XE_VM_FLAG_MIGRATION))
1336                 xe_device_mem_access_get(xe);
1337
1338         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1339         if (!vm_resv_obj) {
1340                 err = -ENOMEM;
1341                 goto err_no_resv;
1342         }
1343
1344         drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1345                        vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1346
1347         drm_gem_object_put(vm_resv_obj);
1348
1349         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1350         if (err)
1351                 goto err_close;
1352
1353         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1354                 vm->flags |= XE_VM_FLAG_64K;
1355
1356         for_each_tile(tile, xe, id) {
1357                 if (flags & XE_VM_FLAG_MIGRATION &&
1358                     tile->id != XE_VM_FLAG_TILE_ID(flags))
1359                         continue;
1360
1361                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1362                 if (IS_ERR(vm->pt_root[id])) {
1363                         err = PTR_ERR(vm->pt_root[id]);
1364                         vm->pt_root[id] = NULL;
1365                         goto err_unlock_close;
1366                 }
1367         }
1368
1369         if (xe_vm_has_scratch(vm)) {
1370                 for_each_tile(tile, xe, id) {
1371                         if (!vm->pt_root[id])
1372                                 continue;
1373
1374                         err = xe_vm_create_scratch(xe, tile, vm);
1375                         if (err)
1376                                 goto err_unlock_close;
1377                 }
1378                 vm->batch_invalidate_tlb = true;
1379         }
1380
1381         if (flags & XE_VM_FLAG_LR_MODE) {
1382                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1383                 vm->flags |= XE_VM_FLAG_LR_MODE;
1384                 vm->batch_invalidate_tlb = false;
1385         }
1386
1387         /* Fill pt_root after allocating scratch tables */
1388         for_each_tile(tile, xe, id) {
1389                 if (!vm->pt_root[id])
1390                         continue;
1391
1392                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1393         }
1394         dma_resv_unlock(xe_vm_resv(vm));
1395
1396         /* Kernel migration VM shouldn't have a circular loop.. */
1397         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1398                 for_each_tile(tile, xe, id) {
1399                         struct xe_gt *gt = tile->primary_gt;
1400                         struct xe_vm *migrate_vm;
1401                         struct xe_exec_queue *q;
1402                         u32 create_flags = EXEC_QUEUE_FLAG_VM;
1403
1404                         if (!vm->pt_root[id])
1405                                 continue;
1406
1407                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1408                         q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1409                                                        XE_ENGINE_CLASS_COPY,
1410                                                        create_flags);
1411                         xe_vm_put(migrate_vm);
1412                         if (IS_ERR(q)) {
1413                                 err = PTR_ERR(q);
1414                                 goto err_close;
1415                         }
1416                         vm->q[id] = q;
1417                         number_tiles++;
1418                 }
1419         }
1420
1421         if (number_tiles > 1)
1422                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1423
1424         mutex_lock(&xe->usm.lock);
1425         if (flags & XE_VM_FLAG_FAULT_MODE)
1426                 xe->usm.num_vm_in_fault_mode++;
1427         else if (!(flags & XE_VM_FLAG_MIGRATION))
1428                 xe->usm.num_vm_in_non_fault_mode++;
1429         mutex_unlock(&xe->usm.lock);
1430
1431         trace_xe_vm_create(vm);
1432
1433         return vm;
1434
1435 err_unlock_close:
1436         dma_resv_unlock(xe_vm_resv(vm));
1437 err_close:
1438         xe_vm_close_and_put(vm);
1439         return ERR_PTR(err);
1440
1441 err_no_resv:
1442         mutex_destroy(&vm->snap_mutex);
1443         for_each_tile(tile, xe, id)
1444                 xe_range_fence_tree_fini(&vm->rftree[id]);
1445         kfree(vm);
1446         if (!(flags & XE_VM_FLAG_MIGRATION))
1447                 xe_device_mem_access_put(xe);
1448         return ERR_PTR(err);
1449 }
1450
1451 static void xe_vm_close(struct xe_vm *vm)
1452 {
1453         down_write(&vm->lock);
1454         vm->size = 0;
1455         up_write(&vm->lock);
1456 }
1457
1458 void xe_vm_close_and_put(struct xe_vm *vm)
1459 {
1460         LIST_HEAD(contested);
1461         struct xe_device *xe = vm->xe;
1462         struct xe_tile *tile;
1463         struct xe_vma *vma, *next_vma;
1464         struct drm_gpuva *gpuva, *next;
1465         u8 id;
1466
1467         xe_assert(xe, !vm->preempt.num_exec_queues);
1468
1469         xe_vm_close(vm);
1470         if (xe_vm_in_preempt_fence_mode(vm))
1471                 flush_work(&vm->preempt.rebind_work);
1472
1473         down_write(&vm->lock);
1474         for_each_tile(tile, xe, id) {
1475                 if (vm->q[id])
1476                         xe_exec_queue_last_fence_put(vm->q[id], vm);
1477         }
1478         up_write(&vm->lock);
1479
1480         for_each_tile(tile, xe, id) {
1481                 if (vm->q[id]) {
1482                         xe_exec_queue_kill(vm->q[id]);
1483                         xe_exec_queue_put(vm->q[id]);
1484                         vm->q[id] = NULL;
1485                 }
1486         }
1487
1488         down_write(&vm->lock);
1489         xe_vm_lock(vm, false);
1490         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1491                 vma = gpuva_to_vma(gpuva);
1492
1493                 if (xe_vma_has_no_bo(vma)) {
1494                         down_read(&vm->userptr.notifier_lock);
1495                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1496                         up_read(&vm->userptr.notifier_lock);
1497                 }
1498
1499                 xe_vm_remove_vma(vm, vma);
1500
1501                 /* easy case, remove from VMA? */
1502                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1503                         list_del_init(&vma->combined_links.rebind);
1504                         xe_vma_destroy(vma, NULL);
1505                         continue;
1506                 }
1507
1508                 list_move_tail(&vma->combined_links.destroy, &contested);
1509                 vma->gpuva.flags |= XE_VMA_DESTROYED;
1510         }
1511
1512         /*
1513          * All vm operations will add shared fences to resv.
1514          * The only exception is eviction for a shared object,
1515          * but even so, the unbind when evicted would still
1516          * install a fence to resv. Hence it's safe to
1517          * destroy the pagetables immediately.
1518          */
1519         xe_vm_free_scratch(vm);
1520
1521         for_each_tile(tile, xe, id) {
1522                 if (vm->pt_root[id]) {
1523                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1524                         vm->pt_root[id] = NULL;
1525                 }
1526         }
1527         xe_vm_unlock(vm);
1528
1529         /*
1530          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1531          * Since we hold a refcount to the bo, we can remove and free
1532          * the members safely without locking.
1533          */
1534         list_for_each_entry_safe(vma, next_vma, &contested,
1535                                  combined_links.destroy) {
1536                 list_del_init(&vma->combined_links.destroy);
1537                 xe_vma_destroy_unlocked(vma);
1538         }
1539
1540         up_write(&vm->lock);
1541
1542         mutex_lock(&xe->usm.lock);
1543         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1544                 xe->usm.num_vm_in_fault_mode--;
1545         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1546                 xe->usm.num_vm_in_non_fault_mode--;
1547         mutex_unlock(&xe->usm.lock);
1548
1549         for_each_tile(tile, xe, id)
1550                 xe_range_fence_tree_fini(&vm->rftree[id]);
1551
1552         xe_vm_put(vm);
1553 }
1554
1555 static void vm_destroy_work_func(struct work_struct *w)
1556 {
1557         struct xe_vm *vm =
1558                 container_of(w, struct xe_vm, destroy_work);
1559         struct xe_device *xe = vm->xe;
1560         struct xe_tile *tile;
1561         u8 id;
1562         void *lookup;
1563
1564         /* xe_vm_close_and_put was not called? */
1565         xe_assert(xe, !vm->size);
1566
1567         mutex_destroy(&vm->snap_mutex);
1568
1569         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1570                 xe_device_mem_access_put(xe);
1571
1572                 if (xe->info.has_asid && vm->usm.asid) {
1573                         mutex_lock(&xe->usm.lock);
1574                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1575                         xe_assert(xe, lookup == vm);
1576                         mutex_unlock(&xe->usm.lock);
1577                 }
1578         }
1579
1580         for_each_tile(tile, xe, id)
1581                 XE_WARN_ON(vm->pt_root[id]);
1582
1583         trace_xe_vm_free(vm);
1584         kfree(vm);
1585 }
1586
1587 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1588 {
1589         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1590
1591         /* To destroy the VM we need to be able to sleep */
1592         queue_work(system_unbound_wq, &vm->destroy_work);
1593 }
1594
1595 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1596 {
1597         struct xe_vm *vm;
1598
1599         mutex_lock(&xef->vm.lock);
1600         vm = xa_load(&xef->vm.xa, id);
1601         if (vm)
1602                 xe_vm_get(vm);
1603         mutex_unlock(&xef->vm.lock);
1604
1605         return vm;
1606 }
1607
1608 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1609 {
1610         return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1611                                          tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1612 }
1613
1614 static struct xe_exec_queue *
1615 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1616 {
1617         return q ? q : vm->q[0];
1618 }
1619
1620 static struct dma_fence *
1621 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1622                  struct xe_sync_entry *syncs, u32 num_syncs,
1623                  bool first_op, bool last_op)
1624 {
1625         struct xe_vm *vm = xe_vma_vm(vma);
1626         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1627         struct xe_tile *tile;
1628         struct dma_fence *fence = NULL;
1629         struct dma_fence **fences = NULL;
1630         struct dma_fence_array *cf = NULL;
1631         int cur_fence = 0, i;
1632         int number_tiles = hweight8(vma->tile_present);
1633         int err;
1634         u8 id;
1635
1636         trace_xe_vma_unbind(vma);
1637
1638         if (vma->ufence) {
1639                 struct xe_user_fence * const f = vma->ufence;
1640
1641                 if (!xe_sync_ufence_get_status(f))
1642                         return ERR_PTR(-EBUSY);
1643
1644                 vma->ufence = NULL;
1645                 xe_sync_ufence_put(f);
1646         }
1647
1648         if (number_tiles > 1) {
1649                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1650                                        GFP_KERNEL);
1651                 if (!fences)
1652                         return ERR_PTR(-ENOMEM);
1653         }
1654
1655         for_each_tile(tile, vm->xe, id) {
1656                 if (!(vma->tile_present & BIT(id)))
1657                         goto next;
1658
1659                 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1660                                            first_op ? syncs : NULL,
1661                                            first_op ? num_syncs : 0);
1662                 if (IS_ERR(fence)) {
1663                         err = PTR_ERR(fence);
1664                         goto err_fences;
1665                 }
1666
1667                 if (fences)
1668                         fences[cur_fence++] = fence;
1669
1670 next:
1671                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1672                         q = list_next_entry(q, multi_gt_list);
1673         }
1674
1675         if (fences) {
1676                 cf = dma_fence_array_create(number_tiles, fences,
1677                                             vm->composite_fence_ctx,
1678                                             vm->composite_fence_seqno++,
1679                                             false);
1680                 if (!cf) {
1681                         --vm->composite_fence_seqno;
1682                         err = -ENOMEM;
1683                         goto err_fences;
1684                 }
1685         }
1686
1687         fence = cf ? &cf->base : !fence ?
1688                 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
1689         if (last_op) {
1690                 for (i = 0; i < num_syncs; i++)
1691                         xe_sync_entry_signal(&syncs[i], NULL, fence);
1692         }
1693
1694         return fence;
1695
1696 err_fences:
1697         if (fences) {
1698                 while (cur_fence)
1699                         dma_fence_put(fences[--cur_fence]);
1700                 kfree(fences);
1701         }
1702
1703         return ERR_PTR(err);
1704 }
1705
1706 static struct dma_fence *
1707 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1708                struct xe_sync_entry *syncs, u32 num_syncs,
1709                bool first_op, bool last_op)
1710 {
1711         struct xe_tile *tile;
1712         struct dma_fence *fence;
1713         struct dma_fence **fences = NULL;
1714         struct dma_fence_array *cf = NULL;
1715         struct xe_vm *vm = xe_vma_vm(vma);
1716         int cur_fence = 0, i;
1717         int number_tiles = hweight8(vma->tile_mask);
1718         int err;
1719         u8 id;
1720
1721         trace_xe_vma_bind(vma);
1722
1723         if (number_tiles > 1) {
1724                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1725                                        GFP_KERNEL);
1726                 if (!fences)
1727                         return ERR_PTR(-ENOMEM);
1728         }
1729
1730         for_each_tile(tile, vm->xe, id) {
1731                 if (!(vma->tile_mask & BIT(id)))
1732                         goto next;
1733
1734                 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1735                                          first_op ? syncs : NULL,
1736                                          first_op ? num_syncs : 0,
1737                                          vma->tile_present & BIT(id));
1738                 if (IS_ERR(fence)) {
1739                         err = PTR_ERR(fence);
1740                         goto err_fences;
1741                 }
1742
1743                 if (fences)
1744                         fences[cur_fence++] = fence;
1745
1746 next:
1747                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1748                         q = list_next_entry(q, multi_gt_list);
1749         }
1750
1751         if (fences) {
1752                 cf = dma_fence_array_create(number_tiles, fences,
1753                                             vm->composite_fence_ctx,
1754                                             vm->composite_fence_seqno++,
1755                                             false);
1756                 if (!cf) {
1757                         --vm->composite_fence_seqno;
1758                         err = -ENOMEM;
1759                         goto err_fences;
1760                 }
1761         }
1762
1763         if (last_op) {
1764                 for (i = 0; i < num_syncs; i++)
1765                         xe_sync_entry_signal(&syncs[i], NULL,
1766                                              cf ? &cf->base : fence);
1767         }
1768
1769         return cf ? &cf->base : fence;
1770
1771 err_fences:
1772         if (fences) {
1773                 while (cur_fence)
1774                         dma_fence_put(fences[--cur_fence]);
1775                 kfree(fences);
1776         }
1777
1778         return ERR_PTR(err);
1779 }
1780
1781 static struct xe_user_fence *
1782 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1783 {
1784         unsigned int i;
1785
1786         for (i = 0; i < num_syncs; i++) {
1787                 struct xe_sync_entry *e = &syncs[i];
1788
1789                 if (xe_sync_is_ufence(e))
1790                         return xe_sync_ufence_get(e);
1791         }
1792
1793         return NULL;
1794 }
1795
1796 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1797                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1798                         u32 num_syncs, bool immediate, bool first_op,
1799                         bool last_op)
1800 {
1801         struct dma_fence *fence;
1802         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1803         struct xe_user_fence *ufence;
1804
1805         xe_vm_assert_held(vm);
1806
1807         ufence = find_ufence_get(syncs, num_syncs);
1808         if (vma->ufence && ufence)
1809                 xe_sync_ufence_put(vma->ufence);
1810
1811         vma->ufence = ufence ?: vma->ufence;
1812
1813         if (immediate) {
1814                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1815                                        last_op);
1816                 if (IS_ERR(fence))
1817                         return PTR_ERR(fence);
1818         } else {
1819                 int i;
1820
1821                 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1822
1823                 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1824                 if (last_op) {
1825                         for (i = 0; i < num_syncs; i++)
1826                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1827                 }
1828         }
1829
1830         if (last_op)
1831                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1832         dma_fence_put(fence);
1833
1834         return 0;
1835 }
1836
1837 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1838                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1839                       u32 num_syncs, bool immediate, bool first_op,
1840                       bool last_op)
1841 {
1842         int err;
1843
1844         xe_vm_assert_held(vm);
1845         xe_bo_assert_held(bo);
1846
1847         if (bo && immediate) {
1848                 err = xe_bo_validate(bo, vm, true);
1849                 if (err)
1850                         return err;
1851         }
1852
1853         return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1854                             last_op);
1855 }
1856
1857 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1858                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1859                         u32 num_syncs, bool first_op, bool last_op)
1860 {
1861         struct dma_fence *fence;
1862         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1863
1864         xe_vm_assert_held(vm);
1865         xe_bo_assert_held(xe_vma_bo(vma));
1866
1867         fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1868         if (IS_ERR(fence))
1869                 return PTR_ERR(fence);
1870
1871         xe_vma_destroy(vma, fence);
1872         if (last_op)
1873                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1874         dma_fence_put(fence);
1875
1876         return 0;
1877 }
1878
1879 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1880                                     DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1881                                     DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1882
1883 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1884                        struct drm_file *file)
1885 {
1886         struct xe_device *xe = to_xe_device(dev);
1887         struct xe_file *xef = to_xe_file(file);
1888         struct drm_xe_vm_create *args = data;
1889         struct xe_tile *tile;
1890         struct xe_vm *vm;
1891         u32 id, asid;
1892         int err;
1893         u32 flags = 0;
1894
1895         if (XE_IOCTL_DBG(xe, args->extensions))
1896                 return -EINVAL;
1897
1898         if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1899                 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1900
1901         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1902                          !xe->info.has_usm))
1903                 return -EINVAL;
1904
1905         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1906                 return -EINVAL;
1907
1908         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1909                 return -EINVAL;
1910
1911         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1912                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1913                 return -EINVAL;
1914
1915         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1916                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1917                 return -EINVAL;
1918
1919         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1920                          xe_device_in_non_fault_mode(xe)))
1921                 return -EINVAL;
1922
1923         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
1924                          xe_device_in_fault_mode(xe)))
1925                 return -EINVAL;
1926
1927         if (XE_IOCTL_DBG(xe, args->extensions))
1928                 return -EINVAL;
1929
1930         if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1931                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1932         if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1933                 flags |= XE_VM_FLAG_LR_MODE;
1934         if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1935                 flags |= XE_VM_FLAG_FAULT_MODE;
1936
1937         vm = xe_vm_create(xe, flags);
1938         if (IS_ERR(vm))
1939                 return PTR_ERR(vm);
1940
1941         mutex_lock(&xef->vm.lock);
1942         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1943         mutex_unlock(&xef->vm.lock);
1944         if (err)
1945                 goto err_close_and_put;
1946
1947         if (xe->info.has_asid) {
1948                 mutex_lock(&xe->usm.lock);
1949                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1950                                       XA_LIMIT(1, XE_MAX_ASID - 1),
1951                                       &xe->usm.next_asid, GFP_KERNEL);
1952                 mutex_unlock(&xe->usm.lock);
1953                 if (err < 0)
1954                         goto err_free_id;
1955
1956                 vm->usm.asid = asid;
1957         }
1958
1959         args->vm_id = id;
1960         vm->xef = xef;
1961
1962         /* Record BO memory for VM pagetable created against client */
1963         for_each_tile(tile, xe, id)
1964                 if (vm->pt_root[id])
1965                         xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1966
1967 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1968         /* Warning: Security issue - never enable by default */
1969         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1970 #endif
1971
1972         return 0;
1973
1974 err_free_id:
1975         mutex_lock(&xef->vm.lock);
1976         xa_erase(&xef->vm.xa, id);
1977         mutex_unlock(&xef->vm.lock);
1978 err_close_and_put:
1979         xe_vm_close_and_put(vm);
1980
1981         return err;
1982 }
1983
1984 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1985                         struct drm_file *file)
1986 {
1987         struct xe_device *xe = to_xe_device(dev);
1988         struct xe_file *xef = to_xe_file(file);
1989         struct drm_xe_vm_destroy *args = data;
1990         struct xe_vm *vm;
1991         int err = 0;
1992
1993         if (XE_IOCTL_DBG(xe, args->pad) ||
1994             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1995                 return -EINVAL;
1996
1997         mutex_lock(&xef->vm.lock);
1998         vm = xa_load(&xef->vm.xa, args->vm_id);
1999         if (XE_IOCTL_DBG(xe, !vm))
2000                 err = -ENOENT;
2001         else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2002                 err = -EBUSY;
2003         else
2004                 xa_erase(&xef->vm.xa, args->vm_id);
2005         mutex_unlock(&xef->vm.lock);
2006
2007         if (!err)
2008                 xe_vm_close_and_put(vm);
2009
2010         return err;
2011 }
2012
2013 static const u32 region_to_mem_type[] = {
2014         XE_PL_TT,
2015         XE_PL_VRAM0,
2016         XE_PL_VRAM1,
2017 };
2018
2019 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2020                           struct xe_exec_queue *q, u32 region,
2021                           struct xe_sync_entry *syncs, u32 num_syncs,
2022                           bool first_op, bool last_op)
2023 {
2024         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
2025         int err;
2026
2027         xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2028
2029         if (!xe_vma_has_no_bo(vma)) {
2030                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2031                 if (err)
2032                         return err;
2033         }
2034
2035         if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
2036                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
2037                                   true, first_op, last_op);
2038         } else {
2039                 int i;
2040
2041                 /* Nothing to do, signal fences now */
2042                 if (last_op) {
2043                         for (i = 0; i < num_syncs; i++) {
2044                                 struct dma_fence *fence =
2045                                         xe_exec_queue_last_fence_get(wait_exec_queue, vm);
2046
2047                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
2048                                 dma_fence_put(fence);
2049                         }
2050                 }
2051
2052                 return 0;
2053         }
2054 }
2055
2056 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2057                              bool post_commit)
2058 {
2059         down_read(&vm->userptr.notifier_lock);
2060         vma->gpuva.flags |= XE_VMA_DESTROYED;
2061         up_read(&vm->userptr.notifier_lock);
2062         if (post_commit)
2063                 xe_vm_remove_vma(vm, vma);
2064 }
2065
2066 #undef ULL
2067 #define ULL     unsigned long long
2068
2069 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2070 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2071 {
2072         struct xe_vma *vma;
2073
2074         switch (op->op) {
2075         case DRM_GPUVA_OP_MAP:
2076                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2077                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
2078                 break;
2079         case DRM_GPUVA_OP_REMAP:
2080                 vma = gpuva_to_vma(op->remap.unmap->va);
2081                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2082                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2083                        op->remap.unmap->keep ? 1 : 0);
2084                 if (op->remap.prev)
2085                         vm_dbg(&xe->drm,
2086                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2087                                (ULL)op->remap.prev->va.addr,
2088                                (ULL)op->remap.prev->va.range);
2089                 if (op->remap.next)
2090                         vm_dbg(&xe->drm,
2091                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2092                                (ULL)op->remap.next->va.addr,
2093                                (ULL)op->remap.next->va.range);
2094                 break;
2095         case DRM_GPUVA_OP_UNMAP:
2096                 vma = gpuva_to_vma(op->unmap.va);
2097                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2098                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2099                        op->unmap.keep ? 1 : 0);
2100                 break;
2101         case DRM_GPUVA_OP_PREFETCH:
2102                 vma = gpuva_to_vma(op->prefetch.va);
2103                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2104                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2105                 break;
2106         default:
2107                 drm_warn(&xe->drm, "NOT POSSIBLE");
2108         }
2109 }
2110 #else
2111 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2112 {
2113 }
2114 #endif
2115
2116 /*
2117  * Create operations list from IOCTL arguments, setup operations fields so parse
2118  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2119  */
2120 static struct drm_gpuva_ops *
2121 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2122                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2123                          u32 operation, u32 flags,
2124                          u32 prefetch_region, u16 pat_index)
2125 {
2126         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2127         struct drm_gpuva_ops *ops;
2128         struct drm_gpuva_op *__op;
2129         struct drm_gpuvm_bo *vm_bo;
2130         int err;
2131
2132         lockdep_assert_held_write(&vm->lock);
2133
2134         vm_dbg(&vm->xe->drm,
2135                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2136                operation, (ULL)addr, (ULL)range,
2137                (ULL)bo_offset_or_userptr);
2138
2139         switch (operation) {
2140         case DRM_XE_VM_BIND_OP_MAP:
2141         case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2142                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2143                                                   obj, bo_offset_or_userptr);
2144                 break;
2145         case DRM_XE_VM_BIND_OP_UNMAP:
2146                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2147                 break;
2148         case DRM_XE_VM_BIND_OP_PREFETCH:
2149                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2150                 break;
2151         case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2152                 xe_assert(vm->xe, bo);
2153
2154                 err = xe_bo_lock(bo, true);
2155                 if (err)
2156                         return ERR_PTR(err);
2157
2158                 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2159                 if (IS_ERR(vm_bo)) {
2160                         xe_bo_unlock(bo);
2161                         return ERR_CAST(vm_bo);
2162                 }
2163
2164                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2165                 drm_gpuvm_bo_put(vm_bo);
2166                 xe_bo_unlock(bo);
2167                 break;
2168         default:
2169                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2170                 ops = ERR_PTR(-EINVAL);
2171         }
2172         if (IS_ERR(ops))
2173                 return ops;
2174
2175         drm_gpuva_for_each_op(__op, ops) {
2176                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2177
2178                 if (__op->op == DRM_GPUVA_OP_MAP) {
2179                         op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2180                         op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2181                         op->map.pat_index = pat_index;
2182                 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2183                         op->prefetch.region = prefetch_region;
2184                 }
2185
2186                 print_op(vm->xe, __op);
2187         }
2188
2189         return ops;
2190 }
2191
2192 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2193                               u16 pat_index, unsigned int flags)
2194 {
2195         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2196         struct drm_exec exec;
2197         struct xe_vma *vma;
2198         int err;
2199
2200         lockdep_assert_held_write(&vm->lock);
2201
2202         if (bo) {
2203                 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2204                 drm_exec_until_all_locked(&exec) {
2205                         err = 0;
2206                         if (!bo->vm) {
2207                                 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2208                                 drm_exec_retry_on_contention(&exec);
2209                         }
2210                         if (!err) {
2211                                 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2212                                 drm_exec_retry_on_contention(&exec);
2213                         }
2214                         if (err) {
2215                                 drm_exec_fini(&exec);
2216                                 return ERR_PTR(err);
2217                         }
2218                 }
2219         }
2220         vma = xe_vma_create(vm, bo, op->gem.offset,
2221                             op->va.addr, op->va.addr +
2222                             op->va.range - 1, pat_index, flags);
2223         if (bo)
2224                 drm_exec_fini(&exec);
2225
2226         if (xe_vma_is_userptr(vma)) {
2227                 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2228                 if (err) {
2229                         prep_vma_destroy(vm, vma, false);
2230                         xe_vma_destroy_unlocked(vma);
2231                         return ERR_PTR(err);
2232                 }
2233         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2234                 err = add_preempt_fences(vm, bo);
2235                 if (err) {
2236                         prep_vma_destroy(vm, vma, false);
2237                         xe_vma_destroy_unlocked(vma);
2238                         return ERR_PTR(err);
2239                 }
2240         }
2241
2242         return vma;
2243 }
2244
2245 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2246 {
2247         if (vma->gpuva.flags & XE_VMA_PTE_1G)
2248                 return SZ_1G;
2249         else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2250                 return SZ_2M;
2251         else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2252                 return SZ_64K;
2253         else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2254                 return SZ_4K;
2255
2256         return SZ_1G;   /* Uninitialized, used max size */
2257 }
2258
2259 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2260 {
2261         switch (size) {
2262         case SZ_1G:
2263                 vma->gpuva.flags |= XE_VMA_PTE_1G;
2264                 break;
2265         case SZ_2M:
2266                 vma->gpuva.flags |= XE_VMA_PTE_2M;
2267                 break;
2268         case SZ_64K:
2269                 vma->gpuva.flags |= XE_VMA_PTE_64K;
2270                 break;
2271         case SZ_4K:
2272                 vma->gpuva.flags |= XE_VMA_PTE_4K;
2273                 break;
2274         }
2275 }
2276
2277 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2278 {
2279         int err = 0;
2280
2281         lockdep_assert_held_write(&vm->lock);
2282
2283         switch (op->base.op) {
2284         case DRM_GPUVA_OP_MAP:
2285                 err |= xe_vm_insert_vma(vm, op->map.vma);
2286                 if (!err)
2287                         op->flags |= XE_VMA_OP_COMMITTED;
2288                 break;
2289         case DRM_GPUVA_OP_REMAP:
2290         {
2291                 u8 tile_present =
2292                         gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2293
2294                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2295                                  true);
2296                 op->flags |= XE_VMA_OP_COMMITTED;
2297
2298                 if (op->remap.prev) {
2299                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2300                         if (!err)
2301                                 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2302                         if (!err && op->remap.skip_prev) {
2303                                 op->remap.prev->tile_present =
2304                                         tile_present;
2305                                 op->remap.prev = NULL;
2306                         }
2307                 }
2308                 if (op->remap.next) {
2309                         err |= xe_vm_insert_vma(vm, op->remap.next);
2310                         if (!err)
2311                                 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2312                         if (!err && op->remap.skip_next) {
2313                                 op->remap.next->tile_present =
2314                                         tile_present;
2315                                 op->remap.next = NULL;
2316                         }
2317                 }
2318
2319                 /* Adjust for partial unbind after removin VMA from VM */
2320                 if (!err) {
2321                         op->base.remap.unmap->va->va.addr = op->remap.start;
2322                         op->base.remap.unmap->va->va.range = op->remap.range;
2323                 }
2324                 break;
2325         }
2326         case DRM_GPUVA_OP_UNMAP:
2327                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2328                 op->flags |= XE_VMA_OP_COMMITTED;
2329                 break;
2330         case DRM_GPUVA_OP_PREFETCH:
2331                 op->flags |= XE_VMA_OP_COMMITTED;
2332                 break;
2333         default:
2334                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2335         }
2336
2337         return err;
2338 }
2339
2340
2341 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2342                                    struct drm_gpuva_ops *ops,
2343                                    struct xe_sync_entry *syncs, u32 num_syncs,
2344                                    struct list_head *ops_list, bool last)
2345 {
2346         struct xe_device *xe = vm->xe;
2347         struct xe_vma_op *last_op = NULL;
2348         struct drm_gpuva_op *__op;
2349         int err = 0;
2350
2351         lockdep_assert_held_write(&vm->lock);
2352
2353         drm_gpuva_for_each_op(__op, ops) {
2354                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2355                 struct xe_vma *vma;
2356                 bool first = list_empty(ops_list);
2357                 unsigned int flags = 0;
2358
2359                 INIT_LIST_HEAD(&op->link);
2360                 list_add_tail(&op->link, ops_list);
2361
2362                 if (first) {
2363                         op->flags |= XE_VMA_OP_FIRST;
2364                         op->num_syncs = num_syncs;
2365                         op->syncs = syncs;
2366                 }
2367
2368                 op->q = q;
2369
2370                 switch (op->base.op) {
2371                 case DRM_GPUVA_OP_MAP:
2372                 {
2373                         flags |= op->map.is_null ?
2374                                 VMA_CREATE_FLAG_IS_NULL : 0;
2375                         flags |= op->map.dumpable ?
2376                                 VMA_CREATE_FLAG_DUMPABLE : 0;
2377
2378                         vma = new_vma(vm, &op->base.map, op->map.pat_index,
2379                                       flags);
2380                         if (IS_ERR(vma))
2381                                 return PTR_ERR(vma);
2382
2383                         op->map.vma = vma;
2384                         break;
2385                 }
2386                 case DRM_GPUVA_OP_REMAP:
2387                 {
2388                         struct xe_vma *old =
2389                                 gpuva_to_vma(op->base.remap.unmap->va);
2390
2391                         op->remap.start = xe_vma_start(old);
2392                         op->remap.range = xe_vma_size(old);
2393
2394                         if (op->base.remap.prev) {
2395                                 flags |= op->base.remap.unmap->va->flags &
2396                                         XE_VMA_READ_ONLY ?
2397                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2398                                 flags |= op->base.remap.unmap->va->flags &
2399                                         DRM_GPUVA_SPARSE ?
2400                                         VMA_CREATE_FLAG_IS_NULL : 0;
2401                                 flags |= op->base.remap.unmap->va->flags &
2402                                         XE_VMA_DUMPABLE ?
2403                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2404
2405                                 vma = new_vma(vm, op->base.remap.prev,
2406                                               old->pat_index, flags);
2407                                 if (IS_ERR(vma))
2408                                         return PTR_ERR(vma);
2409
2410                                 op->remap.prev = vma;
2411
2412                                 /*
2413                                  * Userptr creates a new SG mapping so
2414                                  * we must also rebind.
2415                                  */
2416                                 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2417                                         IS_ALIGNED(xe_vma_end(vma),
2418                                                    xe_vma_max_pte_size(old));
2419                                 if (op->remap.skip_prev) {
2420                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2421                                         op->remap.range -=
2422                                                 xe_vma_end(vma) -
2423                                                 xe_vma_start(old);
2424                                         op->remap.start = xe_vma_end(vma);
2425                                         vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2426                                                (ULL)op->remap.start,
2427                                                (ULL)op->remap.range);
2428                                 }
2429                         }
2430
2431                         if (op->base.remap.next) {
2432                                 flags |= op->base.remap.unmap->va->flags &
2433                                         XE_VMA_READ_ONLY ?
2434                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2435                                 flags |= op->base.remap.unmap->va->flags &
2436                                         DRM_GPUVA_SPARSE ?
2437                                         VMA_CREATE_FLAG_IS_NULL : 0;
2438                                 flags |= op->base.remap.unmap->va->flags &
2439                                         XE_VMA_DUMPABLE ?
2440                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2441
2442                                 vma = new_vma(vm, op->base.remap.next,
2443                                               old->pat_index, flags);
2444                                 if (IS_ERR(vma))
2445                                         return PTR_ERR(vma);
2446
2447                                 op->remap.next = vma;
2448
2449                                 /*
2450                                  * Userptr creates a new SG mapping so
2451                                  * we must also rebind.
2452                                  */
2453                                 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2454                                         IS_ALIGNED(xe_vma_start(vma),
2455                                                    xe_vma_max_pte_size(old));
2456                                 if (op->remap.skip_next) {
2457                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2458                                         op->remap.range -=
2459                                                 xe_vma_end(old) -
2460                                                 xe_vma_start(vma);
2461                                         vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2462                                                (ULL)op->remap.start,
2463                                                (ULL)op->remap.range);
2464                                 }
2465                         }
2466                         break;
2467                 }
2468                 case DRM_GPUVA_OP_UNMAP:
2469                 case DRM_GPUVA_OP_PREFETCH:
2470                         /* Nothing to do */
2471                         break;
2472                 default:
2473                         drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2474                 }
2475
2476                 last_op = op;
2477
2478                 err = xe_vma_op_commit(vm, op);
2479                 if (err)
2480                         return err;
2481         }
2482
2483         /* FIXME: Unhandled corner case */
2484         XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2485
2486         if (!last_op)
2487                 return 0;
2488
2489         last_op->ops = ops;
2490         if (last) {
2491                 last_op->flags |= XE_VMA_OP_LAST;
2492                 last_op->num_syncs = num_syncs;
2493                 last_op->syncs = syncs;
2494         }
2495
2496         return 0;
2497 }
2498
2499 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2500                       struct xe_vma *vma, struct xe_vma_op *op)
2501 {
2502         int err;
2503
2504         lockdep_assert_held_write(&vm->lock);
2505
2506         err = xe_vm_prepare_vma(exec, vma, 1);
2507         if (err)
2508                 return err;
2509
2510         xe_vm_assert_held(vm);
2511         xe_bo_assert_held(xe_vma_bo(vma));
2512
2513         switch (op->base.op) {
2514         case DRM_GPUVA_OP_MAP:
2515                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2516                                  op->syncs, op->num_syncs,
2517                                  !xe_vm_in_fault_mode(vm),
2518                                  op->flags & XE_VMA_OP_FIRST,
2519                                  op->flags & XE_VMA_OP_LAST);
2520                 break;
2521         case DRM_GPUVA_OP_REMAP:
2522         {
2523                 bool prev = !!op->remap.prev;
2524                 bool next = !!op->remap.next;
2525
2526                 if (!op->remap.unmap_done) {
2527                         if (prev || next)
2528                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2529                         err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2530                                            op->num_syncs,
2531                                            op->flags & XE_VMA_OP_FIRST,
2532                                            op->flags & XE_VMA_OP_LAST &&
2533                                            !prev && !next);
2534                         if (err)
2535                                 break;
2536                         op->remap.unmap_done = true;
2537                 }
2538
2539                 if (prev) {
2540                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2541                         err = xe_vm_bind(vm, op->remap.prev, op->q,
2542                                          xe_vma_bo(op->remap.prev), op->syncs,
2543                                          op->num_syncs, true, false,
2544                                          op->flags & XE_VMA_OP_LAST && !next);
2545                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2546                         if (err)
2547                                 break;
2548                         op->remap.prev = NULL;
2549                 }
2550
2551                 if (next) {
2552                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2553                         err = xe_vm_bind(vm, op->remap.next, op->q,
2554                                          xe_vma_bo(op->remap.next),
2555                                          op->syncs, op->num_syncs,
2556                                          true, false,
2557                                          op->flags & XE_VMA_OP_LAST);
2558                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2559                         if (err)
2560                                 break;
2561                         op->remap.next = NULL;
2562                 }
2563
2564                 break;
2565         }
2566         case DRM_GPUVA_OP_UNMAP:
2567                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2568                                    op->num_syncs, op->flags & XE_VMA_OP_FIRST,
2569                                    op->flags & XE_VMA_OP_LAST);
2570                 break;
2571         case DRM_GPUVA_OP_PREFETCH:
2572                 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2573                                      op->syncs, op->num_syncs,
2574                                      op->flags & XE_VMA_OP_FIRST,
2575                                      op->flags & XE_VMA_OP_LAST);
2576                 break;
2577         default:
2578                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2579         }
2580
2581         if (err)
2582                 trace_xe_vma_fail(vma);
2583
2584         return err;
2585 }
2586
2587 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2588                                struct xe_vma_op *op)
2589 {
2590         struct drm_exec exec;
2591         int err;
2592
2593 retry_userptr:
2594         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2595         drm_exec_until_all_locked(&exec) {
2596                 err = op_execute(&exec, vm, vma, op);
2597                 drm_exec_retry_on_contention(&exec);
2598                 if (err)
2599                         break;
2600         }
2601         drm_exec_fini(&exec);
2602
2603         if (err == -EAGAIN) {
2604                 lockdep_assert_held_write(&vm->lock);
2605
2606                 if (op->base.op == DRM_GPUVA_OP_REMAP) {
2607                         if (!op->remap.unmap_done)
2608                                 vma = gpuva_to_vma(op->base.remap.unmap->va);
2609                         else if (op->remap.prev)
2610                                 vma = op->remap.prev;
2611                         else
2612                                 vma = op->remap.next;
2613                 }
2614
2615                 if (xe_vma_is_userptr(vma)) {
2616                         err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2617                         if (!err)
2618                                 goto retry_userptr;
2619
2620                         trace_xe_vma_fail(vma);
2621                 }
2622         }
2623
2624         return err;
2625 }
2626
2627 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2628 {
2629         int ret = 0;
2630
2631         lockdep_assert_held_write(&vm->lock);
2632
2633         switch (op->base.op) {
2634         case DRM_GPUVA_OP_MAP:
2635                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2636                 break;
2637         case DRM_GPUVA_OP_REMAP:
2638         {
2639                 struct xe_vma *vma;
2640
2641                 if (!op->remap.unmap_done)
2642                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2643                 else if (op->remap.prev)
2644                         vma = op->remap.prev;
2645                 else
2646                         vma = op->remap.next;
2647
2648                 ret = __xe_vma_op_execute(vm, vma, op);
2649                 break;
2650         }
2651         case DRM_GPUVA_OP_UNMAP:
2652                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2653                                           op);
2654                 break;
2655         case DRM_GPUVA_OP_PREFETCH:
2656                 ret = __xe_vma_op_execute(vm,
2657                                           gpuva_to_vma(op->base.prefetch.va),
2658                                           op);
2659                 break;
2660         default:
2661                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2662         }
2663
2664         return ret;
2665 }
2666
2667 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2668 {
2669         bool last = op->flags & XE_VMA_OP_LAST;
2670
2671         if (last) {
2672                 while (op->num_syncs--)
2673                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2674                 kfree(op->syncs);
2675                 if (op->q)
2676                         xe_exec_queue_put(op->q);
2677         }
2678         if (!list_empty(&op->link))
2679                 list_del(&op->link);
2680         if (op->ops)
2681                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2682         if (last)
2683                 xe_vm_put(vm);
2684 }
2685
2686 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2687                              bool post_commit, bool prev_post_commit,
2688                              bool next_post_commit)
2689 {
2690         lockdep_assert_held_write(&vm->lock);
2691
2692         switch (op->base.op) {
2693         case DRM_GPUVA_OP_MAP:
2694                 if (op->map.vma) {
2695                         prep_vma_destroy(vm, op->map.vma, post_commit);
2696                         xe_vma_destroy_unlocked(op->map.vma);
2697                 }
2698                 break;
2699         case DRM_GPUVA_OP_UNMAP:
2700         {
2701                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2702
2703                 if (vma) {
2704                         down_read(&vm->userptr.notifier_lock);
2705                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2706                         up_read(&vm->userptr.notifier_lock);
2707                         if (post_commit)
2708                                 xe_vm_insert_vma(vm, vma);
2709                 }
2710                 break;
2711         }
2712         case DRM_GPUVA_OP_REMAP:
2713         {
2714                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2715
2716                 if (op->remap.prev) {
2717                         prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2718                         xe_vma_destroy_unlocked(op->remap.prev);
2719                 }
2720                 if (op->remap.next) {
2721                         prep_vma_destroy(vm, op->remap.next, next_post_commit);
2722                         xe_vma_destroy_unlocked(op->remap.next);
2723                 }
2724                 if (vma) {
2725                         down_read(&vm->userptr.notifier_lock);
2726                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2727                         up_read(&vm->userptr.notifier_lock);
2728                         if (post_commit)
2729                                 xe_vm_insert_vma(vm, vma);
2730                 }
2731                 break;
2732         }
2733         case DRM_GPUVA_OP_PREFETCH:
2734                 /* Nothing to do */
2735                 break;
2736         default:
2737                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2738         }
2739 }
2740
2741 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2742                                      struct drm_gpuva_ops **ops,
2743                                      int num_ops_list)
2744 {
2745         int i;
2746
2747         for (i = num_ops_list - 1; i >= 0; --i) {
2748                 struct drm_gpuva_ops *__ops = ops[i];
2749                 struct drm_gpuva_op *__op;
2750
2751                 if (!__ops)
2752                         continue;
2753
2754                 drm_gpuva_for_each_op_reverse(__op, __ops) {
2755                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2756
2757                         xe_vma_op_unwind(vm, op,
2758                                          op->flags & XE_VMA_OP_COMMITTED,
2759                                          op->flags & XE_VMA_OP_PREV_COMMITTED,
2760                                          op->flags & XE_VMA_OP_NEXT_COMMITTED);
2761                 }
2762
2763                 drm_gpuva_ops_free(&vm->gpuvm, __ops);
2764         }
2765 }
2766
2767 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2768                                      struct list_head *ops_list)
2769 {
2770         struct xe_vma_op *op, *next;
2771         int err;
2772
2773         lockdep_assert_held_write(&vm->lock);
2774
2775         list_for_each_entry_safe(op, next, ops_list, link) {
2776                 err = xe_vma_op_execute(vm, op);
2777                 if (err) {
2778                         drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2779                                  op->base.op, err);
2780                         /*
2781                          * FIXME: Killing VM rather than proper error handling
2782                          */
2783                         xe_vm_kill(vm);
2784                         return -ENOSPC;
2785                 }
2786                 xe_vma_op_cleanup(vm, op);
2787         }
2788
2789         return 0;
2790 }
2791
2792 #define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
2793          DRM_XE_VM_BIND_FLAG_DUMPABLE)
2794 #define XE_64K_PAGE_MASK 0xffffull
2795 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2796
2797 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2798                                     struct drm_xe_vm_bind *args,
2799                                     struct drm_xe_vm_bind_op **bind_ops)
2800 {
2801         int err;
2802         int i;
2803
2804         if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2805             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2806                 return -EINVAL;
2807
2808         if (XE_IOCTL_DBG(xe, args->extensions))
2809                 return -EINVAL;
2810
2811         if (args->num_binds > 1) {
2812                 u64 __user *bind_user =
2813                         u64_to_user_ptr(args->vector_of_binds);
2814
2815                 *bind_ops = kvmalloc_array(args->num_binds,
2816                                            sizeof(struct drm_xe_vm_bind_op),
2817                                            GFP_KERNEL | __GFP_ACCOUNT);
2818                 if (!*bind_ops)
2819                         return -ENOMEM;
2820
2821                 err = __copy_from_user(*bind_ops, bind_user,
2822                                        sizeof(struct drm_xe_vm_bind_op) *
2823                                        args->num_binds);
2824                 if (XE_IOCTL_DBG(xe, err)) {
2825                         err = -EFAULT;
2826                         goto free_bind_ops;
2827                 }
2828         } else {
2829                 *bind_ops = &args->bind;
2830         }
2831
2832         for (i = 0; i < args->num_binds; ++i) {
2833                 u64 range = (*bind_ops)[i].range;
2834                 u64 addr = (*bind_ops)[i].addr;
2835                 u32 op = (*bind_ops)[i].op;
2836                 u32 flags = (*bind_ops)[i].flags;
2837                 u32 obj = (*bind_ops)[i].obj;
2838                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2839                 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2840                 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2841                 u16 pat_index = (*bind_ops)[i].pat_index;
2842                 u16 coh_mode;
2843
2844                 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2845                         err = -EINVAL;
2846                         goto free_bind_ops;
2847                 }
2848
2849                 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2850                 (*bind_ops)[i].pat_index = pat_index;
2851                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2852                 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2853                         err = -EINVAL;
2854                         goto free_bind_ops;
2855                 }
2856
2857                 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2858                         err = -EINVAL;
2859                         goto free_bind_ops;
2860                 }
2861
2862                 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2863                     XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2864                     XE_IOCTL_DBG(xe, obj && is_null) ||
2865                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2866                     XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2867                                  is_null) ||
2868                     XE_IOCTL_DBG(xe, !obj &&
2869                                  op == DRM_XE_VM_BIND_OP_MAP &&
2870                                  !is_null) ||
2871                     XE_IOCTL_DBG(xe, !obj &&
2872                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2873                     XE_IOCTL_DBG(xe, addr &&
2874                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2875                     XE_IOCTL_DBG(xe, range &&
2876                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2877                     XE_IOCTL_DBG(xe, obj &&
2878                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2879                     XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2880                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2881                     XE_IOCTL_DBG(xe, obj &&
2882                                  op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2883                     XE_IOCTL_DBG(xe, prefetch_region &&
2884                                  op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2885                     XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2886                                        xe->info.mem_region_mask)) ||
2887                     XE_IOCTL_DBG(xe, obj &&
2888                                  op == DRM_XE_VM_BIND_OP_UNMAP)) {
2889                         err = -EINVAL;
2890                         goto free_bind_ops;
2891                 }
2892
2893                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2894                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2895                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2896                     XE_IOCTL_DBG(xe, !range &&
2897                                  op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2898                         err = -EINVAL;
2899                         goto free_bind_ops;
2900                 }
2901         }
2902
2903         return 0;
2904
2905 free_bind_ops:
2906         if (args->num_binds > 1)
2907                 kvfree(*bind_ops);
2908         return err;
2909 }
2910
2911 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2912                                        struct xe_exec_queue *q,
2913                                        struct xe_sync_entry *syncs,
2914                                        int num_syncs)
2915 {
2916         struct dma_fence *fence;
2917         int i, err = 0;
2918
2919         fence = xe_sync_in_fence_get(syncs, num_syncs,
2920                                      to_wait_exec_queue(vm, q), vm);
2921         if (IS_ERR(fence))
2922                 return PTR_ERR(fence);
2923
2924         for (i = 0; i < num_syncs; i++)
2925                 xe_sync_entry_signal(&syncs[i], NULL, fence);
2926
2927         xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2928                                      fence);
2929         dma_fence_put(fence);
2930
2931         return err;
2932 }
2933
2934 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2935 {
2936         struct xe_device *xe = to_xe_device(dev);
2937         struct xe_file *xef = to_xe_file(file);
2938         struct drm_xe_vm_bind *args = data;
2939         struct drm_xe_sync __user *syncs_user;
2940         struct xe_bo **bos = NULL;
2941         struct drm_gpuva_ops **ops = NULL;
2942         struct xe_vm *vm;
2943         struct xe_exec_queue *q = NULL;
2944         u32 num_syncs, num_ufence = 0;
2945         struct xe_sync_entry *syncs = NULL;
2946         struct drm_xe_vm_bind_op *bind_ops;
2947         LIST_HEAD(ops_list);
2948         int err;
2949         int i;
2950
2951         err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2952         if (err)
2953                 return err;
2954
2955         if (args->exec_queue_id) {
2956                 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2957                 if (XE_IOCTL_DBG(xe, !q)) {
2958                         err = -ENOENT;
2959                         goto free_objs;
2960                 }
2961
2962                 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2963                         err = -EINVAL;
2964                         goto put_exec_queue;
2965                 }
2966         }
2967
2968         vm = xe_vm_lookup(xef, args->vm_id);
2969         if (XE_IOCTL_DBG(xe, !vm)) {
2970                 err = -EINVAL;
2971                 goto put_exec_queue;
2972         }
2973
2974         err = down_write_killable(&vm->lock);
2975         if (err)
2976                 goto put_vm;
2977
2978         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2979                 err = -ENOENT;
2980                 goto release_vm_lock;
2981         }
2982
2983         for (i = 0; i < args->num_binds; ++i) {
2984                 u64 range = bind_ops[i].range;
2985                 u64 addr = bind_ops[i].addr;
2986
2987                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2988                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2989                         err = -EINVAL;
2990                         goto release_vm_lock;
2991                 }
2992         }
2993
2994         if (args->num_binds) {
2995                 bos = kvcalloc(args->num_binds, sizeof(*bos),
2996                                GFP_KERNEL | __GFP_ACCOUNT);
2997                 if (!bos) {
2998                         err = -ENOMEM;
2999                         goto release_vm_lock;
3000                 }
3001
3002                 ops = kvcalloc(args->num_binds, sizeof(*ops),
3003                                GFP_KERNEL | __GFP_ACCOUNT);
3004                 if (!ops) {
3005                         err = -ENOMEM;
3006                         goto release_vm_lock;
3007                 }
3008         }
3009
3010         for (i = 0; i < args->num_binds; ++i) {
3011                 struct drm_gem_object *gem_obj;
3012                 u64 range = bind_ops[i].range;
3013                 u64 addr = bind_ops[i].addr;
3014                 u32 obj = bind_ops[i].obj;
3015                 u64 obj_offset = bind_ops[i].obj_offset;
3016                 u16 pat_index = bind_ops[i].pat_index;
3017                 u16 coh_mode;
3018
3019                 if (!obj)
3020                         continue;
3021
3022                 gem_obj = drm_gem_object_lookup(file, obj);
3023                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3024                         err = -ENOENT;
3025                         goto put_obj;
3026                 }
3027                 bos[i] = gem_to_xe_bo(gem_obj);
3028
3029                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3030                     XE_IOCTL_DBG(xe, obj_offset >
3031                                  bos[i]->size - range)) {
3032                         err = -EINVAL;
3033                         goto put_obj;
3034                 }
3035
3036                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3037                         if (XE_IOCTL_DBG(xe, obj_offset &
3038                                          XE_64K_PAGE_MASK) ||
3039                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3040                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3041                                 err = -EINVAL;
3042                                 goto put_obj;
3043                         }
3044                 }
3045
3046                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3047                 if (bos[i]->cpu_caching) {
3048                         if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3049                                          bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3050                                 err = -EINVAL;
3051                                 goto put_obj;
3052                         }
3053                 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3054                         /*
3055                          * Imported dma-buf from a different device should
3056                          * require 1way or 2way coherency since we don't know
3057                          * how it was mapped on the CPU. Just assume is it
3058                          * potentially cached on CPU side.
3059                          */
3060                         err = -EINVAL;
3061                         goto put_obj;
3062                 }
3063         }
3064
3065         if (args->num_syncs) {
3066                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3067                 if (!syncs) {
3068                         err = -ENOMEM;
3069                         goto put_obj;
3070                 }
3071         }
3072
3073         syncs_user = u64_to_user_ptr(args->syncs);
3074         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3075                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3076                                           &syncs_user[num_syncs],
3077                                           (xe_vm_in_lr_mode(vm) ?
3078                                            SYNC_PARSE_FLAG_LR_MODE : 0) |
3079                                           (!args->num_binds ?
3080                                            SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3081                 if (err)
3082                         goto free_syncs;
3083
3084                 if (xe_sync_is_ufence(&syncs[num_syncs]))
3085                         num_ufence++;
3086         }
3087
3088         if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3089                 err = -EINVAL;
3090                 goto free_syncs;
3091         }
3092
3093         if (!args->num_binds) {
3094                 err = -ENODATA;
3095                 goto free_syncs;
3096         }
3097
3098         for (i = 0; i < args->num_binds; ++i) {
3099                 u64 range = bind_ops[i].range;
3100                 u64 addr = bind_ops[i].addr;
3101                 u32 op = bind_ops[i].op;
3102                 u32 flags = bind_ops[i].flags;
3103                 u64 obj_offset = bind_ops[i].obj_offset;
3104                 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3105                 u16 pat_index = bind_ops[i].pat_index;
3106
3107                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3108                                                   addr, range, op, flags,
3109                                                   prefetch_region, pat_index);
3110                 if (IS_ERR(ops[i])) {
3111                         err = PTR_ERR(ops[i]);
3112                         ops[i] = NULL;
3113                         goto unwind_ops;
3114                 }
3115
3116                 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3117                                               &ops_list,
3118                                               i == args->num_binds - 1);
3119                 if (err)
3120                         goto unwind_ops;
3121         }
3122
3123         /* Nothing to do */
3124         if (list_empty(&ops_list)) {
3125                 err = -ENODATA;
3126                 goto unwind_ops;
3127         }
3128
3129         xe_vm_get(vm);
3130         if (q)
3131                 xe_exec_queue_get(q);
3132
3133         err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3134
3135         up_write(&vm->lock);
3136
3137         if (q)
3138                 xe_exec_queue_put(q);
3139         xe_vm_put(vm);
3140
3141         for (i = 0; bos && i < args->num_binds; ++i)
3142                 xe_bo_put(bos[i]);
3143
3144         kvfree(bos);
3145         kvfree(ops);
3146         if (args->num_binds > 1)
3147                 kvfree(bind_ops);
3148
3149         return err;
3150
3151 unwind_ops:
3152         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3153 free_syncs:
3154         if (err == -ENODATA)
3155                 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3156         while (num_syncs--)
3157                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3158
3159         kfree(syncs);
3160 put_obj:
3161         for (i = 0; i < args->num_binds; ++i)
3162                 xe_bo_put(bos[i]);
3163 release_vm_lock:
3164         up_write(&vm->lock);
3165 put_vm:
3166         xe_vm_put(vm);
3167 put_exec_queue:
3168         if (q)
3169                 xe_exec_queue_put(q);
3170 free_objs:
3171         kvfree(bos);
3172         kvfree(ops);
3173         if (args->num_binds > 1)
3174                 kvfree(bind_ops);
3175         return err;
3176 }
3177
3178 /**
3179  * xe_vm_lock() - Lock the vm's dma_resv object
3180  * @vm: The struct xe_vm whose lock is to be locked
3181  * @intr: Whether to perform any wait interruptible
3182  *
3183  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3184  * contended lock was interrupted. If @intr is false, the function
3185  * always returns 0.
3186  */
3187 int xe_vm_lock(struct xe_vm *vm, bool intr)
3188 {
3189         if (intr)
3190                 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3191
3192         return dma_resv_lock(xe_vm_resv(vm), NULL);
3193 }
3194
3195 /**
3196  * xe_vm_unlock() - Unlock the vm's dma_resv object
3197  * @vm: The struct xe_vm whose lock is to be released.
3198  *
3199  * Unlock a buffer object lock that was locked by xe_vm_lock().
3200  */
3201 void xe_vm_unlock(struct xe_vm *vm)
3202 {
3203         dma_resv_unlock(xe_vm_resv(vm));
3204 }
3205
3206 /**
3207  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3208  * @vma: VMA to invalidate
3209  *
3210  * Walks a list of page tables leaves which it memset the entries owned by this
3211  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3212  * complete.
3213  *
3214  * Returns 0 for success, negative error code otherwise.
3215  */
3216 int xe_vm_invalidate_vma(struct xe_vma *vma)
3217 {
3218         struct xe_device *xe = xe_vma_vm(vma)->xe;
3219         struct xe_tile *tile;
3220         u32 tile_needs_invalidate = 0;
3221         int seqno[XE_MAX_TILES_PER_DEVICE];
3222         u8 id;
3223         int ret;
3224
3225         xe_assert(xe, !xe_vma_is_null(vma));
3226         trace_xe_vma_invalidate(vma);
3227
3228         /* Check that we don't race with page-table updates */
3229         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3230                 if (xe_vma_is_userptr(vma)) {
3231                         WARN_ON_ONCE(!mmu_interval_check_retry
3232                                      (&to_userptr_vma(vma)->userptr.notifier,
3233                                       to_userptr_vma(vma)->userptr.notifier_seq));
3234                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3235                                                              DMA_RESV_USAGE_BOOKKEEP));
3236
3237                 } else {
3238                         xe_bo_assert_held(xe_vma_bo(vma));
3239                 }
3240         }
3241
3242         for_each_tile(tile, xe, id) {
3243                 if (xe_pt_zap_ptes(tile, vma)) {
3244                         tile_needs_invalidate |= BIT(id);
3245                         xe_device_wmb(xe);
3246                         /*
3247                          * FIXME: We potentially need to invalidate multiple
3248                          * GTs within the tile
3249                          */
3250                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3251                         if (seqno[id] < 0)
3252                                 return seqno[id];
3253                 }
3254         }
3255
3256         for_each_tile(tile, xe, id) {
3257                 if (tile_needs_invalidate & BIT(id)) {
3258                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3259                         if (ret < 0)
3260                                 return ret;
3261                 }
3262         }
3263
3264         vma->tile_invalidated = vma->tile_mask;
3265
3266         return 0;
3267 }
3268
3269 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3270 {
3271         struct drm_gpuva *gpuva;
3272         bool is_vram;
3273         uint64_t addr;
3274
3275         if (!down_read_trylock(&vm->lock)) {
3276                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3277                 return 0;
3278         }
3279         if (vm->pt_root[gt_id]) {
3280                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3281                 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3282                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3283                            is_vram ? "VRAM" : "SYS");
3284         }
3285
3286         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3287                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3288                 bool is_userptr = xe_vma_is_userptr(vma);
3289                 bool is_null = xe_vma_is_null(vma);
3290
3291                 if (is_null) {
3292                         addr = 0;
3293                 } else if (is_userptr) {
3294                         struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
3295                         struct xe_res_cursor cur;
3296
3297                         if (sg) {
3298                                 xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
3299                                 addr = xe_res_dma(&cur);
3300                         } else {
3301                                 addr = 0;
3302                         }
3303                 } else {
3304                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3305                         is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3306                 }
3307                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3308                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3309                            xe_vma_size(vma),
3310                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3311                            is_vram ? "VRAM" : "SYS");
3312         }
3313         up_read(&vm->lock);
3314
3315         return 0;
3316 }
3317
3318 struct xe_vm_snapshot {
3319         unsigned long num_snaps;
3320         struct {
3321                 u64 ofs, bo_ofs;
3322                 unsigned long len;
3323                 struct xe_bo *bo;
3324                 void *data;
3325                 struct mm_struct *mm;
3326         } snap[];
3327 };
3328
3329 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3330 {
3331         unsigned long num_snaps = 0, i;
3332         struct xe_vm_snapshot *snap = NULL;
3333         struct drm_gpuva *gpuva;
3334
3335         if (!vm)
3336                 return NULL;
3337
3338         mutex_lock(&vm->snap_mutex);
3339         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3340                 if (gpuva->flags & XE_VMA_DUMPABLE)
3341                         num_snaps++;
3342         }
3343
3344         if (num_snaps)
3345                 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3346         if (!snap)
3347                 goto out_unlock;
3348
3349         snap->num_snaps = num_snaps;
3350         i = 0;
3351         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3352                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3353                 struct xe_bo *bo = vma->gpuva.gem.obj ?
3354                         gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3355
3356                 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3357                         continue;
3358
3359                 snap->snap[i].ofs = xe_vma_start(vma);
3360                 snap->snap[i].len = xe_vma_size(vma);
3361                 if (bo) {
3362                         snap->snap[i].bo = xe_bo_get(bo);
3363                         snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3364                 } else if (xe_vma_is_userptr(vma)) {
3365                         struct mm_struct *mm =
3366                                 to_userptr_vma(vma)->userptr.notifier.mm;
3367
3368                         if (mmget_not_zero(mm))
3369                                 snap->snap[i].mm = mm;
3370                         else
3371                                 snap->snap[i].data = ERR_PTR(-EFAULT);
3372
3373                         snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3374                 } else {
3375                         snap->snap[i].data = ERR_PTR(-ENOENT);
3376                 }
3377                 i++;
3378         }
3379
3380 out_unlock:
3381         mutex_unlock(&vm->snap_mutex);
3382         return snap;
3383 }
3384
3385 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3386 {
3387         for (int i = 0; i < snap->num_snaps; i++) {
3388                 struct xe_bo *bo = snap->snap[i].bo;
3389                 struct iosys_map src;
3390                 int err;
3391
3392                 if (IS_ERR(snap->snap[i].data))
3393                         continue;
3394
3395                 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3396                 if (!snap->snap[i].data) {
3397                         snap->snap[i].data = ERR_PTR(-ENOMEM);
3398                         goto cleanup_bo;
3399                 }
3400
3401                 if (bo) {
3402                         dma_resv_lock(bo->ttm.base.resv, NULL);
3403                         err = ttm_bo_vmap(&bo->ttm, &src);
3404                         if (!err) {
3405                                 xe_map_memcpy_from(xe_bo_device(bo),
3406                                                    snap->snap[i].data,
3407                                                    &src, snap->snap[i].bo_ofs,
3408                                                    snap->snap[i].len);
3409                                 ttm_bo_vunmap(&bo->ttm, &src);
3410                         }
3411                         dma_resv_unlock(bo->ttm.base.resv);
3412                 } else {
3413                         void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3414
3415                         kthread_use_mm(snap->snap[i].mm);
3416                         if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3417                                 err = 0;
3418                         else
3419                                 err = -EFAULT;
3420                         kthread_unuse_mm(snap->snap[i].mm);
3421
3422                         mmput(snap->snap[i].mm);
3423                         snap->snap[i].mm = NULL;
3424                 }
3425
3426                 if (err) {
3427                         kvfree(snap->snap[i].data);
3428                         snap->snap[i].data = ERR_PTR(err);
3429                 }
3430
3431 cleanup_bo:
3432                 xe_bo_put(bo);
3433                 snap->snap[i].bo = NULL;
3434         }
3435 }
3436
3437 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3438 {
3439         unsigned long i, j;
3440
3441         for (i = 0; i < snap->num_snaps; i++) {
3442                 if (IS_ERR(snap->snap[i].data))
3443                         goto uncaptured;
3444
3445                 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3446                 drm_printf(p, "[%llx].data: ",
3447                            snap->snap[i].ofs);
3448
3449                 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3450                         u32 *val = snap->snap[i].data + j;
3451                         char dumped[ASCII85_BUFSZ];
3452
3453                         drm_puts(p, ascii85_encode(*val, dumped));
3454                 }
3455
3456                 drm_puts(p, "\n");
3457                 continue;
3458
3459 uncaptured:
3460                 drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
3461                            snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
3462                            PTR_ERR(snap->snap[i].data));
3463         }
3464 }
3465
3466 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3467 {
3468         unsigned long i;
3469
3470         if (!snap)
3471                 return;
3472
3473         for (i = 0; i < snap->num_snaps; i++) {
3474                 if (!IS_ERR(snap->snap[i].data))
3475                         kvfree(snap->snap[i].data);
3476                 xe_bo_put(snap->snap[i].bo);
3477                 if (snap->snap[i].mm)
3478                         mmput(snap->snap[i].mm);
3479         }
3480         kvfree(snap);
3481 }