#include "intel_reset.h"
#include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3
static void engine_skip_context(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
+ struct intel_context *hung_ctx = rq->context;
if (!i915_request_is_active(rq))
return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->gem_context == hung_ctx)
+ if (rq->context == hung_ctx)
i915_request_skip(rq, -EIO);
}
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
- unsigned int score;
+ struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
+ unsigned int score;
- if (i915_gem_context_is_banned(ctx))
+ if (IS_ERR_OR_NULL(file_priv))
+ return;
+
+ score = 0;
+ if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
}
}
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
{
+ struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
+ ctx = rq->context->gem_context;
+ if (!ctx)
+ return false;
+
+ if (i915_gem_context_is_closed(ctx)) {
+ intel_context_set_banned(rq->context);
+ return true;
+ }
+
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
if (!i915_gem_context_is_bannable(ctx))
return false;
+ dev_notice(ctx->i915->drm.dev,
+ "%s context reset due to GPU hang\n",
+ ctx->name);
+
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
+ intel_context_set_banned(rq->context);
}
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
+ client_mark_guilty(ctx, banned);
return banned;
}
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
{
- atomic_inc(&ctx->active_count);
+ if (rq->context->gem_context)
+ atomic_inc(&rq->context->gem_context->active_count);
}
void __i915_request_reset(struct i915_request *rq, bool guilty)
GEM_BUG_ON(i915_request_completed(rq));
+ rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
+ if (mark_guilty(rq))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
+ mark_innocent(rq);
}
+ rcu_read_unlock();
}
static bool i915_in_reset(struct pci_dev *pdev)
continue;
GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]);
- node = &vma->obj->base.vma_node;
+
+ if (!vma->mmo)
+ continue;
+
+ node = &vma->mmo->vma_node;
vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+
unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
drm_vma_node_offset_addr(node) + vma_offset,
vma->size,
engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
i915_request_mark_complete(request);
spin_unlock_irqrestore(&engine->active.lock, flags);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void __intel_gt_set_wedged(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = >->timelines;
struct intel_timeline *tl;
- unsigned long flags;
bool ok;
if (!test_bit(I915_WEDGED, >->reset.flags))
*
* No more can be submitted until we reset the wedged bit.
*/
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry(tl, &timelines->active_list, link) {
struct dma_fence *fence;
if (!fence)
continue;
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/*
* All internal dependencies (i915_requests) will have
dma_fence_put(fence);
/* Restart iteration after droping lock */
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
struct intel_gt *gt = engine->gt;
+ bool uses_guc = intel_engine_in_guc_submission_mode(engine);
int ret;
- GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
+ ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags));
if (!intel_engine_pm_get_if_awake(engine))
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
- if (!engine->gt->uc.guc.execbuf_client)
+ if (!uses_guc)
ret = intel_gt_reset_engine(engine);
else
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->gt->uc.guc.execbuf_client ? "GuC " : "",
+ uses_guc ? "GuC " : "",
engine->name, ret);
goto out;
}
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}