drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 17 Dec 2019 01:16:59 +0000 (01:16 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 17 Dec 2019 16:59:48 +0000 (16:59 +0000)
As we stash a pointer to the HWSP cacheline on the request, when reading
it we only need confirm that the cacheline is still valid by checking
that the request and timeline are still intact.

v2: Protect hwsp_cachline with RCU

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_timeline.c
drivers/gpu/drm/i915/gt/intel_timeline_types.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h

index d71aafb66d6ef451faaf45542f907e2a5b3db0e9..ee5dc4fbdeb9ab689c6e73dffc729699548b2818 100644 (file)
@@ -15,6 +15,9 @@
 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
 
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+
 struct intel_timeline_hwsp {
        struct intel_gt *gt;
        struct intel_gt_timelines *gt_timelines;
@@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
        u64 free_bitmap;
 };
 
-struct intel_timeline_cacheline {
-       struct i915_active active;
-       struct intel_timeline_hwsp *hwsp;
-       void *vaddr;
-#define CACHELINE_BITS 6
-#define CACHELINE_FREE CACHELINE_BITS
-};
-
 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
@@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
        __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 
        i915_active_fini(&cl->active);
-       kfree(cl);
+       kfree_rcu(cl, rcu);
 }
 
 __i915_active_call
@@ -514,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from,
                             struct i915_request *to,
                             u32 *hwsp)
 {
-       struct intel_timeline *tl;
+       struct intel_timeline_cacheline *cl;
        int err;
 
+       GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
+
        rcu_read_lock();
-       tl = rcu_dereference(from->timeline);
-       if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
-               tl = NULL;
+       cl = rcu_dereference(from->hwsp_cacheline);
+       if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
+               goto unlock; /* seqno wrapped and completed! */
+       if (unlikely(i915_request_completed(from)))
+               goto release;
        rcu_read_unlock();
-       if (!tl) /* already completed */
-               return 1;
 
-       GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
-
-       err = -EAGAIN;
-       if (mutex_trylock(&tl->mutex)) {
-               struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
-
-               if (i915_request_completed(from)) {
-                       err = 1;
-                       goto unlock;
-               }
+       err = cacheline_ref(cl, to);
+       if (err)
+               goto out;
 
-               err = cacheline_ref(cl, to);
-               if (err)
-                       goto unlock;
+       *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+               ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
 
-               if (likely(cl == tl->hwsp_cacheline)) {
-                       *hwsp = tl->hwsp_offset;
-               } else { /* across a seqno wrap, recover the original offset */
-                       *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
-                               ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
-                               CACHELINE_BYTES;
-               }
+out:
+       i915_active_release(&cl->active);
+       return err;
 
+release:
+       i915_active_release(&cl->active);
 unlock:
-               mutex_unlock(&tl->mutex);
-       }
-       intel_timeline_put(tl);
-
-       return err;
+       rcu_read_unlock();
+       return 1;
 }
 
 void intel_timeline_unpin(struct intel_timeline *tl)
index aaf15cbe1ce18526c6f8794957785b758d33a149..24d040f14e8965cce3d7a7826e2e5776cd43d56d 100644 (file)
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
 
 struct drm_i915_private;
 struct i915_vma;
-struct intel_timeline_cacheline;
 struct i915_syncmap;
+struct intel_timeline_hwsp;
 
 struct intel_timeline {
        u64 fence_context;
@@ -87,4 +88,13 @@ struct intel_timeline {
        struct rcu_head rcu;
 };
 
+struct intel_timeline_cacheline {
+       struct i915_active active;
+
+       struct intel_timeline_hwsp *hwsp;
+       void *vaddr;
+
+       struct rcu_head rcu;
+};
+
 #endif /* __I915_TIMELINE_TYPES_H__ */
index a59b803aef92c9a32e7beb5f2f6010e186c7592f..269470d3527a05bf7e3b13f016683d427758bf4e 100644 (file)
@@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
        rq->execution_mask = ce->engine->mask;
        rq->flags = 0;
 
-       rcu_assign_pointer(rq->timeline, tl);
+       RCU_INIT_POINTER(rq->timeline, tl);
+       RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
        rq->hwsp_seqno = tl->hwsp_seqno;
-       rq->hwsp_cacheline = tl->hwsp_cacheline;
 
        rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
index a561b8efe8698b0f27c3a7ea012a4d9c2d381fcc..aa38290eea3d416e080b2df5be6c94565df8b193 100644 (file)
@@ -30,6 +30,7 @@
 
 #include "gt/intel_context_types.h"
 #include "gt/intel_engine_types.h"
+#include "gt/intel_timeline_types.h"
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
@@ -41,8 +42,6 @@
 struct drm_file;
 struct drm_i915_gem_object;
 struct i915_request;
-struct intel_timeline;
-struct intel_timeline_cacheline;
 
 struct i915_capture_list {
        struct i915_capture_list *next;
@@ -183,7 +182,7 @@ struct i915_request {
         * inside the timeline's HWSP vma, but it is only valid while this
         * request has not completed and guarded by the timeline mutex.
         */
-       struct intel_timeline_cacheline *hwsp_cacheline;
+       struct intel_timeline_cacheline __rcu *hwsp_cacheline;
 
        /** Position in the ring of the start of the request */
        u32 head;