drm/i915: Add support for moving fence waiting
authorMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Mon, 22 Nov 2021 21:45:49 +0000 (22:45 +0100)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Thu, 25 Nov 2021 08:36:13 +0000 (09:36 +0100)
For now, we will only allow async migration when TTM is used,
so the paths we care about are related to TTM.

The mmap path is handled by having the fence in ttm_bo->moving,
when pinning, the binding only becomes available after the moving
fence is signaled, and pinning a cpu map will only work after
the moving fence signals.

This should close all holes where userspace can read a buffer
before it's fully migrated.

v2:
- Fix a couple of SPARSE warnings
v3:
- Fix a NULL pointer dereference
v4:
- Ditch the moving fence waiting for i915_vma_pin_iomap() and
  replace with a verification that the vma is already bound.
  (Matthew Auld)
- Squash with a previous patch introducing moving fence waiting and
  accessing interfaces (Matthew Auld)
- Rename to indicated that we also add support for sync waiting.
v5:
- Fix check for NULL and unreferencing i915_vma_verify_bind_complete()
  (Matthew Auld)
- Fix compilation failure if !CONFIG_DRM_I915_DEBUG_GEM
- Fix include ordering. (Matthew Auld)
v7:
- Fix yet another compilation failure with clang if
  !CONFIG_DRM_I915_DEBUG_GEM

Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211122214554.371864-2-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gem/i915_gem_object.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gem/i915_gem_pages.c
drivers/gpu/drm/i915/i915_vma.c

index 575d7a194b8656695713451e62ebccf033c07647..5fac9b560b733a3e2ccaef3d457a1232385d2b7e 100644 (file)
@@ -31,6 +31,7 @@
 #include "i915_gem_context.h"
 #include "i915_gem_mman.h"
 #include "i915_gem_object.h"
+#include "i915_gem_ttm.h"
 #include "i915_memcpy.h"
 #include "i915_trace.h"
 
@@ -727,6 +728,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
        .export = i915_gem_prime_export,
 };
 
+/**
+ * i915_gem_object_get_moving_fence - Get the object's moving fence if any
+ * @obj: The object whose moving fence to get.
+ *
+ * A non-signaled moving fence means that there is an async operation
+ * pending on the object that needs to be waited on before setting up
+ * any GPU- or CPU PTEs to the object's pages.
+ *
+ * Return: A refcounted pointer to the object's moving fence if any,
+ * NULL otherwise.
+ */
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+{
+       return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+}
+
+/**
+ * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
+ * @obj: The object whose moving fence to wait for.
+ * @intr: Whether to wait interruptible.
+ *
+ * If the moving fence signaled without an error, it is detached from the
+ * object and put.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
+ * negative error code if the async operation represented by the
+ * moving fence failed.
+ */
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+                                     bool intr)
+{
+       struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
+       int ret;
+
+       assert_object_held(obj);
+       if (!fence)
+               return 0;
+
+       ret = dma_fence_wait(fence, intr);
+       if (ret)
+               return ret;
+
+       if (fence->error)
+               return fence->error;
+
+       i915_gem_to_ttm(obj)->moving = NULL;
+       dma_fence_put(fence);
+       return 0;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/huge_gem_object.c"
 #include "selftests/huge_pages.c"
index 133963b461350fede10f931597659078becb4497..66f20b803b01d5440580199fe7cec1ad9eaa2aa8 100644 (file)
@@ -517,6 +517,12 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
        i915_gem_object_unpin_pages(obj);
 }
 
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+                                     bool intr);
+
 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
                                         unsigned int cache_level);
 bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
index c4f684b7cc51aacd0a772c41449f6e4a5623671d..49c6e55c68ce0f2801d0ee57b32a572ec9eebfef 100644 (file)
@@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
        }
 
        if (!ptr) {
+               err = i915_gem_object_wait_moving_fence(obj, true);
+               if (err) {
+                       ptr = ERR_PTR(err);
+                       goto err_unpin;
+               }
+
                if (GEM_WARN_ON(type == I915_MAP_WC &&
                                !static_cpu_has(X86_FEATURE_PAT)))
                        ptr = ERR_PTR(-ENODEV);
index e2f2c4c52009545f92a6cc64d92563c285d5dbed..927f0d4f8e118f886062a8992fc08a187e925633 100644 (file)
@@ -353,6 +353,32 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
        return err;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static int i915_vma_verify_bind_complete(struct i915_vma *vma)
+{
+       int err = 0;
+
+       if (i915_active_has_exclusive(&vma->active)) {
+               struct dma_fence *fence =
+                       i915_active_fence_get(&vma->active.excl);
+
+               if (!fence)
+                       return 0;
+
+               if (dma_fence_is_signaled(fence))
+                       err = fence->error;
+               else
+                       err = -EBUSY;
+
+               dma_fence_put(fence);
+       }
+
+       return err;
+}
+#else
+#define i915_vma_verify_bind_complete(_vma) 0
+#endif
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
@@ -425,6 +451,13 @@ int i915_vma_bind(struct i915_vma *vma,
                __i915_gem_object_pin_pages(vma->obj);
                work->pinned = i915_gem_object_get(vma->obj);
        } else {
+               if (vma->obj) {
+                       int ret;
+
+                       ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+                       if (ret)
+                               return ret;
+               }
                vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
        }
 
@@ -446,6 +479,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
        GEM_BUG_ON(!i915_vma_is_ggtt(vma));
        GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
+       GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
 
        ptr = READ_ONCE(vma->iomap);
        if (ptr == NULL) {
@@ -861,6 +895,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
                    u64 size, u64 alignment, u64 flags)
 {
        struct i915_vma_work *work = NULL;
+       struct dma_fence *moving = NULL;
        intel_wakeref_t wakeref = 0;
        unsigned int bound;
        int err;
@@ -886,7 +921,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        if (flags & PIN_GLOBAL)
                wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-       if (flags & vma->vm->bind_async_flags) {
+       moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
+       if (flags & vma->vm->bind_async_flags || moving) {
                /* lock VM */
                err = i915_vm_lock_objects(vma->vm, ww);
                if (err)
@@ -900,6 +936,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
                work->vm = i915_vm_get(vma->vm);
 
+               dma_fence_work_chain(&work->base, moving);
+
                /* Allocate enough page directories to used PTE */
                if (vma->vm->allocate_va_range) {
                        err = i915_vm_alloc_pt_stash(vma->vm,
@@ -1004,7 +1042,10 @@ err_fence:
 err_rpm:
        if (wakeref)
                intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+       if (moving)
+               dma_fence_put(moving);
        vma_put_pages(vma);
+
        return err;
 }