drm/i915/gt: Poison residual state [HWSP] across resume.
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 21 Apr 2020 09:25:04 +0000 (10:25 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 21 Apr 2020 15:27:39 +0000 (16:27 +0100)
Since we may lose the content of any buffer when we relinquish control
of the system (e.g. suspend/resume), we have to be careful not to rely
on regaining control. A good method to detect when we might be using
garbage is by always injecting that garbage prior to first use on
load/resume/etc.

v2: Drop sanitize callback on cleanup
v3: Move seqno reset to timeline enter, so we reset all timelines.
However, this is done on every activation during runtime and not reset.
The similar level of paranoia we apply to correcting context state after
a period of inactivity.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Venkata Ramana Nayana <venkata.ramana.nayana@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200421092504.7416-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_timeline.c
drivers/gpu/drm/i915/gt/intel_timeline.h

index 34f67eb9bfa1f576e7f01757d3d35a4065ebff14..d42a9d6767d483b77bd23937a4235ebeba2e9a40 100644 (file)
@@ -3649,7 +3649,26 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
+       /*
+        * Poison residual state on resume, in case the suspend didn't!
+        *
+        * We have to assume that across suspend/resume (or other loss
+        * of control) that the contents of our pinned buffers has been
+        * lost, replaced by garbage. Since this doesn't always happen,
+        * let's poison such state so that we more quickly spot when
+        * we falsely assume it has been preserved.
+        */
+       if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
+
        reset_csb_pointers(engine);
+
+       /*
+        * The kernel_context HWSP is stored in the status_page. As above,
+        * that may be lost on resume/initialisation, and so we need to
+        * reset the value in the HWSP.
+        */
+       intel_timeline_reset_seqno(engine->kernel_context->timeline);
 }
 
 static void enable_error_interrupt(struct intel_engine_cs *engine)
@@ -4539,6 +4558,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
 
 static void execlists_release(struct intel_engine_cs *engine)
 {
+       engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
+
        execlists_shutdown(engine);
 
        intel_engine_cleanup_common(engine);
@@ -4550,7 +4571,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
        /* Default vfuncs which can be overriden by each engine. */
 
-       engine->sanitize = execlists_sanitize;
        engine->resume = execlists_resume;
 
        engine->cops = &execlists_context_ops;
@@ -4666,6 +4686,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
                execlists->csb_size = GEN11_CSB_ENTRIES;
 
        /* Finally, take ownership and responsibility for cleanup! */
+       engine->sanitize = execlists_sanitize;
        engine->release = execlists_release;
 
        return 0;
index 3779c2ae0d65b7f07071d6327e53b569f8b0c7f6..29a39e44fa3642edb7051268ea022cef3ac1e420 100644 (file)
@@ -337,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl)
        return 0;
 }
 
+void intel_timeline_reset_seqno(const struct intel_timeline *tl)
+{
+       /* Must be pinned to be writable, and no requests in flight. */
+       GEM_BUG_ON(!atomic_read(&tl->pin_count));
+       WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+}
+
 void intel_timeline_enter(struct intel_timeline *tl)
 {
        struct intel_gt_timelines *timelines = &tl->gt->timelines;
@@ -365,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl)
                return;
 
        spin_lock(&timelines->lock);
-       if (!atomic_fetch_inc(&tl->active_count))
+       if (!atomic_fetch_inc(&tl->active_count)) {
+               /*
+                * The HWSP is volatile, and may have been lost while inactive,
+                * e.g. across suspend/resume. Be paranoid, and ensure that
+                * the HWSP value matches our seqno so we don't proclaim
+                * the next request as already complete.
+                */
+               intel_timeline_reset_seqno(tl);
                list_add_tail(&tl->link, &timelines->active_list);
+       }
        spin_unlock(&timelines->lock);
 }
 
index f5b7eade3809b41b220d9bfea5ac30cd556c38a9..c8e59a333182a199c386dcfe62a8080e67a68e1a 100644 (file)
@@ -84,6 +84,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 void intel_timeline_exit(struct intel_timeline *tl);
 void intel_timeline_unpin(struct intel_timeline *tl);
 
+void intel_timeline_reset_seqno(const struct intel_timeline *tl);
+
 int intel_timeline_read_hwsp(struct i915_request *from,
                             struct i915_request *until,
                             u32 *hwsp_offset);