From 3cc44feb9861d2f5267af9b962ae92c5ea1b48fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Nov 2019 06:55:21 +0000 Subject: [PATCH] drm/i915: Reduce nested prepare_remote_context() to a trylock On context retiring, we may invoke the kernel_context to unpin this context. Elsewhere, we may use the kernel_context to modify this context. This currently leads to an AB-BA lock inversion, so we need to back-off from the contended lock, and repeat. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111732 Signed-off-by: Chris Wilson Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly") Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191126065521.2331017-1-chris@chris-wilson.co.uk (cherry picked from commit 58b4c1a07ada7fb91ea757bdb9bd47df02207357) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_context.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); -- 2.25.1