drm/amdgpu: update userqueue BOs and PDs
authorArvind Yadav <arvind.yadav@amd.com>
Wed, 25 Sep 2024 16:09:49 +0000 (18:09 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 8 Apr 2025 20:48:16 +0000 (16:48 -0400)
This patch updates the VM_IOCTL to allow userspace to synchronize
the mapping/unmapping of a BO in the page table.

The major changes are:
- it adds a drm_timeline object as an input parameter to the VM IOCTL.
- this object is used by the kernel to sync the update of the BO in
  the page table during the mapping of the object.
- the kernel also synchronizes the tlb flush of the page table entry of
  this object during the unmapping (Added in this series:
  https://patchwork.freedesktop.org/series/131276/ and
  https://patchwork.freedesktop.org/patch/584182/)
- the userspace can wait on this timeline, and then the BO is ready to
  be consumed by the GPU.

The UAPI for the same has been approved here:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392

V2:
 - remove the eviction fence coupling

V3:
 - added the drm timeline support instead of input/output fence
   (Christian)

V4:
 - made timeline 64-bit (Christian)
 - bug fix (Arvind)

V5: GLCTS bug fix (Arvind)
V6: Rename syncobj_handle -> timeline_syncobj_out
    Rename point -> timeline_point_in (Marek)
V7: Addressed review comments from Christian:
    - do not send last_update fence in case of vm_clear_freed, instead
      return the fence from gen_va_update_vm
    - move the functions to update bo_mapping  to amdgpu_gem.c
    - do not use amdgpu_userq_update_vm anymore in userq_create()
V8: Addressed review comments from Christian:
    - Split amdgpu_gem_update_bo_mapping function.
    - amdgpu_gem_va_update_vm should return stub for error.
V9: Addressed review comments from Christian:
    - Rename the function amdgpu_gem_update_timeline_node.
    - amdgpu_gem_update_timeline_node should be void function.
    - when timeline_point is zero don't allocate a chain and
      call drm_syncobj_replace_fence() instead of
      drm_syncobj_add_point().
V11: rebase
V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va.
V13: Fix the review comment by renaming timeline syncobj (Marek)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
include/uapi/drm/amdgpu_drm.h

index 69429df094771307d0dd5ffa30f4a4a70ee5b2e8..542a1b70f2ee9768e38bae7295aa9dca65254970 100644 (file)
@@ -36,6 +36,7 @@
 #include <drm/drm_exec.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
 
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_vm.h"
 
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+                               uint32_t syncobj_handle,
+                               uint64_t point,
+                               struct drm_syncobj **syncobj,
+                               struct dma_fence_chain **chain)
+{
+       if (!syncobj_handle)
+               return 0;
+
+       /* Find the sync object */
+       *syncobj = drm_syncobj_find(filp, syncobj_handle);
+       if (!*syncobj)
+               return -ENOENT;
+
+       if (!point)
+               return 0;
+
+       /* Allocate the chain node */
+       *chain = dma_fence_chain_alloc();
+       if (!*chain) {
+               drm_syncobj_put(*syncobj);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+                            struct amdgpu_bo_va *bo_va,
+                            uint32_t operation,
+                            uint64_t point,
+                            struct dma_fence *fence,
+                            struct drm_syncobj *syncobj,
+                            struct dma_fence_chain *chain)
+{
+       struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
+       struct amdgpu_vm *vm = &fpriv->vm;
+       struct dma_fence *last_update;
+
+       if (!syncobj)
+               return;
+
+       /* Find the last update fence */
+       switch (operation) {
+       case AMDGPU_VA_OP_MAP:
+       case AMDGPU_VA_OP_REPLACE:
+               if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+                       last_update = vm->last_update;
+               else
+                       last_update = bo_va->last_pt_update;
+               break;
+       case AMDGPU_VA_OP_UNMAP:
+       case AMDGPU_VA_OP_CLEAR:
+               last_update = fence;
+               break;
+       default:
+               return;
+       }
+
+       /* Add fence to timeline */
+       if (!point)
+               drm_syncobj_replace_fence(syncobj, last_update);
+       else
+               drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
+
 static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
 {
        struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@@ -638,18 +708,23 @@ out:
  *
  * Update the bo_va directly after setting its address. Errors are not
  * vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
  */
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-                                   struct amdgpu_vm *vm,
-                                   struct amdgpu_bo_va *bo_va,
-                                   uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+                       struct amdgpu_vm *vm,
+                       struct amdgpu_bo_va *bo_va,
+                       uint32_t operation)
 {
+       struct dma_fence *fence = dma_fence_get_stub();
        int r;
 
        if (!amdgpu_vm_ready(vm))
-               return;
+               return fence;
 
-       r = amdgpu_vm_clear_freed(adev, vm, NULL);
+       r = amdgpu_vm_clear_freed(adev, vm, &fence);
        if (r)
                goto error;
 
@@ -665,6 +740,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 error:
        if (r && r != -ERESTARTSYS)
                DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+
+       return fence;
 }
 
 /**
@@ -713,6 +790,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
        struct amdgpu_fpriv *fpriv = filp->driver_priv;
        struct amdgpu_bo *abo;
        struct amdgpu_bo_va *bo_va;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence_chain *timeline_chain = NULL;
+       struct dma_fence *fence;
        struct drm_exec exec;
        uint64_t va_flags;
        uint64_t vm_size;
@@ -827,9 +907,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
        default:
                break;
        }
-       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
-               amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-                                       args->operation);
+       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+
+               r = amdgpu_gem_update_timeline_node(filp,
+                                                   args->vm_timeline_syncobj_out,
+                                                   args->vm_timeline_point,
+                                                   &timeline_syncobj,
+                                                   &timeline_chain);
+
+               fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+                                               args->operation);
+
+               if (!r)
+                       amdgpu_gem_update_bo_mapping(filp, bo_va,
+                                                    args->operation,
+                                                    args->vm_timeline_point,
+                                                    fence, timeline_syncobj,
+                                                    timeline_chain);
+       }
 
 error:
        drm_exec_fini(&exec);
index 1a21259cb8c4add32a045dff9505731a8a5297b2..ca82935ff93aa9c82235e2d075c6084a20de4441 100644 (file)
@@ -857,6 +857,15 @@ struct drm_amdgpu_gem_va {
        __u64 offset_in_bo;
        /** Specify mapping size. Must be correctly aligned. */
        __u64 map_size;
+       /**
+        * vm_timeline_point is a sequence number used to add new timeline point.
+        */
+       __u64 vm_timeline_point;
+       /**
+        * The vm page table update fence is installed in given vm_timeline_syncobj_out
+        * at vm_timeline_point.
+        */
+       __u32 vm_timeline_syncobj_out;
 };
 
 #define AMDGPU_HW_IP_GFX          0