drm/xe: Lock all gpuva ops during VM bind IOCTL
authorMatthew Brost <matthew.brost@intel.com>
Thu, 25 Apr 2024 04:55:01 +0000 (21:55 -0700)
committerMatthew Brost <matthew.brost@intel.com>
Fri, 26 Apr 2024 19:09:55 +0000 (12:09 -0700)
Lock all BOs used in gpuva ops and validate all BOs in a single step
during the VM bind IOCTL.

This help with the transition to making all gpuva ops in a VM bind IOCTL
a single atomic job which is required for proper error handling.

v2:
 - Better commit message (Oak)
 - s/op_lock/op_lock_and_prep, few other renames too (Oak)
 - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
 - Do not reserve slots in locking step (direction based on series from Thomas)
v3:
 - Validate BO if is immediate set (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-2-matthew.brost@intel.com
drivers/gpu/drm/xe/xe_vm.c

index 89c73d109f6a398c8e226e53c935480ba915037f..cc9d2ebc7e66562837f4d7268f3af44ceac6bae7 100644 (file)
@@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
 
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
 
-static void xe_vm_kill(struct xe_vm *vm)
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
 {
        struct xe_exec_queue *q;
 
        lockdep_assert_held(&vm->lock);
 
-       xe_vm_lock(vm, false);
+       if (unlocked)
+               xe_vm_lock(vm, false);
+
        vm->flags |= XE_VM_FLAG_BANNED;
        trace_xe_vm_kill(vm);
 
        list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
                q->ops->kill(q);
-       xe_vm_unlock(vm);
+
+       if (unlocked)
+               xe_vm_unlock(vm);
 
        /* TODO: Inform user the VM is banned */
 }
@@ -557,7 +561,7 @@ out_unlock_outer:
 
        if (err) {
                drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
-               xe_vm_kill(vm);
+               xe_vm_kill(vm, true);
        }
        up_write(&vm->lock);
 
@@ -1774,17 +1778,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue
                      u32 num_syncs, bool immediate, bool first_op,
                      bool last_op)
 {
-       int err;
-
        xe_vm_assert_held(vm);
        xe_bo_assert_held(bo);
 
-       if (bo && immediate) {
-               err = xe_bo_validate(bo, vm, true);
-               if (err)
-                       return err;
-       }
-
        return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
                            last_op);
 }
@@ -2437,17 +2433,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
        return 0;
 }
 
-static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
-                     struct xe_vma *vma, struct xe_vma_op *op)
+static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
+                     struct xe_vma_op *op)
 {
        int err;
 
        lockdep_assert_held_write(&vm->lock);
 
-       err = xe_vm_lock_vma(exec, vma);
-       if (err)
-               return err;
-
        xe_vm_assert_held(vm);
        xe_bo_assert_held(xe_vma_bo(vma));
 
@@ -2528,19 +2520,10 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
                               struct xe_vma_op *op)
 {
-       struct drm_exec exec;
        int err;
 
 retry_userptr:
-       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
-       drm_exec_until_all_locked(&exec) {
-               err = op_execute(&exec, vm, vma, op);
-               drm_exec_retry_on_contention(&exec);
-               if (err)
-                       break;
-       }
-       drm_exec_fini(&exec);
-
+       err = op_execute(vm, vma, op);
        if (err == -EAGAIN) {
                lockdep_assert_held_write(&vm->lock);
 
@@ -2705,29 +2688,114 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
        }
 }
 
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+                                bool validate)
+{
+       struct xe_bo *bo = xe_vma_bo(vma);
+       int err = 0;
+
+       if (bo) {
+               if (!bo->vm)
+                       err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
+               if (!err && validate)
+                       err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+       }
+
+       return err;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+                           struct xe_vma_op *op)
+{
+       int err = 0;
+
+       switch (op->base.op) {
+       case DRM_GPUVA_OP_MAP:
+               err = vma_lock_and_validate(exec, op->map.vma,
+                                           !xe_vm_in_fault_mode(vm) ||
+                                           op->map.immediate);
+               break;
+       case DRM_GPUVA_OP_REMAP:
+               err = vma_lock_and_validate(exec,
+                                           gpuva_to_vma(op->base.remap.unmap->va),
+                                           false);
+               if (!err && op->remap.prev)
+                       err = vma_lock_and_validate(exec, op->remap.prev, true);
+               if (!err && op->remap.next)
+                       err = vma_lock_and_validate(exec, op->remap.next, true);
+               break;
+       case DRM_GPUVA_OP_UNMAP:
+               err = vma_lock_and_validate(exec,
+                                           gpuva_to_vma(op->base.unmap.va),
+                                           false);
+               break;
+       case DRM_GPUVA_OP_PREFETCH:
+               err = vma_lock_and_validate(exec,
+                                           gpuva_to_vma(op->base.prefetch.va), true);
+               break;
+       default:
+               drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+       }
+
+       return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+                                          struct xe_vm *vm,
+                                          struct list_head *ops_list)
+{
+       struct xe_vma_op *op;
+       int err;
+
+       err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
+       if (err)
+               return err;
+
+       list_for_each_entry(op, ops_list, link) {
+               err = op_lock_and_prep(exec, vm, op);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
                                     struct list_head *ops_list)
 {
+       struct drm_exec exec;
        struct xe_vma_op *op, *next;
        int err;
 
        lockdep_assert_held_write(&vm->lock);
 
-       list_for_each_entry_safe(op, next, ops_list, link) {
-               err = xe_vma_op_execute(vm, op);
-               if (err) {
-                       drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
-                                op->base.op, err);
-                       /*
-                        * FIXME: Killing VM rather than proper error handling
-                        */
-                       xe_vm_kill(vm);
-                       return -ENOSPC;
+       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+                     DRM_EXEC_IGNORE_DUPLICATES, 0);
+       drm_exec_until_all_locked(&exec) {
+               err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
+               drm_exec_retry_on_contention(&exec);
+               if (err)
+                       goto unlock;
+
+               list_for_each_entry_safe(op, next, ops_list, link) {
+                       err = xe_vma_op_execute(vm, op);
+                       if (err) {
+                               drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+                                        op->base.op, err);
+                               /*
+                                * FIXME: Killing VM rather than proper error handling
+                                */
+                               xe_vm_kill(vm, false);
+                               err = -ENOSPC;
+                               goto unlock;
+                       }
+                       xe_vma_op_cleanup(vm, op);
                }
-               xe_vma_op_cleanup(vm, op);
        }
 
-       return 0;
+unlock:
+       drm_exec_fini(&exec);
+       return err;
 }
 
 #define SUPPORTED_FLAGS        \