drm/i915: Skip error capture when wedged on init
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Thu, 11 Nov 2021 13:06:34 +0000 (13:06 +0000)
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>
Tue, 16 Nov 2021 10:36:08 +0000 (10:36 +0000)
Trying to capture uninitialised engines when we wedged on init ends in
tears. Skip that together with uC capture, since failure to initialise the
latter can actually be one of the reasons for wedging on init.

v2:
 * Use i915_disable_error_state when wedging on init/fini.

v3:
 * Handle mock tests.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v1
Link: https://patchwork.freedesktop.org/patch/msgid/20211111130634.266098-1-tvrtko.ursulin@linux.intel.com
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/selftests/mock_gem_device.c

index 51b56b8e50030a3afa6245cefa9f3cf59750606c..0fbd6dbadce75da2144d34db14dd737a317bfefb 100644 (file)
@@ -1448,6 +1448,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
        BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
                     I915_WEDGED_ON_INIT);
        intel_gt_set_wedged(gt);
+       i915_disable_error_state(gt->i915, -ENODEV);
        set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
 
        /* Wedged on init is non-recoverable */
@@ -1457,6 +1458,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
 void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
 {
        intel_gt_set_wedged(gt);
+       i915_disable_error_state(gt->i915, -ENODEV);
        set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
        intel_gt_retire_requests(gt); /* cleanup any wedged requests */
 }
index 9ab3f284d1dd9ede421b36b249d53274812f799a..d0e2e61de8d41a3f660edb78931e68a117424c18 100644 (file)
@@ -177,6 +177,8 @@ struct drm_i915_private *mock_gem_device(void)
 
        mock_uncore_init(&i915->uncore, i915);
 
+       spin_lock_init(&i915->gpu_error.lock);
+
        i915_gem_init__mm(i915);
        intel_gt_init_early(&i915->gt, i915);
        atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */