drm/i915: Allow sharing the idle-barrier from other kernel requests
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_active.c
index d32db8a4db5cb9a91c9c812e29671b214d5c226d..1e09722b5317b547a7322e7ec4eef0adac0879c5 100644 (file)
@@ -33,6 +33,38 @@ struct active_node {
        u64 timeline;
 };
 
+static inline struct active_node *
+node_from_active(struct i915_active_request *active)
+{
+       return container_of(active, struct active_node, base);
+}
+
+#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
+
+static inline bool is_barrier(const struct i915_active_request *active)
+{
+       return IS_ERR(rcu_access_pointer(active->request));
+}
+
+static inline struct llist_node *barrier_to_ll(struct active_node *node)
+{
+       GEM_BUG_ON(!is_barrier(&node->base));
+       return (struct llist_node *)&node->base.link;
+}
+
+static inline struct intel_engine_cs *
+barrier_to_engine(struct active_node *node)
+{
+       GEM_BUG_ON(!is_barrier(&node->base));
+       return (struct intel_engine_cs *)node->base.link.prev;
+}
+
+static inline struct active_node *barrier_from_ll(struct llist_node *x)
+{
+       return container_of((struct list_head *)x,
+                           struct active_node, base.link);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
 
 static void *active_debug_hint(void *addr)
@@ -127,7 +159,7 @@ active_retire(struct i915_active *ref)
 static void
 node_retire(struct i915_active_request *base, struct i915_request *rq)
 {
-       active_retire(container_of(base, struct active_node, base)->ref);
+       active_retire(node_from_active(base)->ref);
 }
 
 static struct i915_active_request *
@@ -184,6 +216,7 @@ out:
        ref->cache = node;
        mutex_unlock(&ref->mutex);
 
+       BUILD_BUG_ON(offsetof(typeof(*node), base));
        return &node->base;
 }
 
@@ -201,11 +234,52 @@ void __i915_active_init(struct drm_i915_private *i915,
        ref->retire = retire;
        ref->tree = RB_ROOT;
        ref->cache = NULL;
-       init_llist_head(&ref->barriers);
+       init_llist_head(&ref->preallocated_barriers);
        atomic_set(&ref->count, 0);
        __mutex_init(&ref->mutex, "i915_active", key);
 }
 
+static bool __active_del_barrier(struct i915_active *ref,
+                                struct active_node *node)
+{
+       struct intel_engine_cs *engine = barrier_to_engine(node);
+       struct llist_node *head = NULL, *tail = NULL;
+       struct llist_node *pos, *next;
+
+       GEM_BUG_ON(node->timeline != engine->kernel_context->ring->timeline->fence_context);
+
+       /*
+        * Rebuild the llist excluding our node. We may perform this
+        * outside of the kernel_context timeline mutex and so someone
+        * else may be manipulating the engine->barrier_tasks, in
+        * which case either we or they will be upset :)
+        *
+        * A second __active_del_barrier() will report failure to claim
+        * the active_node and the caller will just shrug and know not to
+        * claim ownership of its node.
+        *
+        * A concurrent i915_request_add_active_barriers() will miss adding
+        * any of the tasks, but we will try again on the next -- and since
+        * we are actively using the barrier, we know that there will be
+        * at least another opportunity when we idle.
+        */
+       llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
+               if (node == barrier_from_ll(pos)) {
+                       node = NULL;
+                       continue;
+               }
+
+               pos->next = head;
+               head = pos;
+               if (!tail)
+                       tail = pos;
+       }
+       if (head)
+               llist_add_batch(head, tail, &engine->barrier_tasks);
+
+       return !node;
+}
+
 int i915_active_ref(struct i915_active *ref,
                    u64 timeline,
                    struct i915_request *rq)
@@ -224,8 +298,20 @@ int i915_active_ref(struct i915_active *ref,
                goto out;
        }
 
-       if (!i915_active_request_isset(active))
-               atomic_inc(&ref->count);
+       if (is_barrier(active)) { /* proto-node used by our idle barrier */
+               /*
+                * This request is on the kernel_context timeline, and so
+                * we can use it to substitute for the pending idle-barrer
+                * request that we want to emit on the kernel_context.
+                */
+               __active_del_barrier(ref, node_from_active(active));
+               RCU_INIT_POINTER(active->request, NULL);
+               INIT_LIST_HEAD(&active->link);
+       } else {
+               if (!i915_active_request_isset(active))
+                       atomic_inc(&ref->count);
+       }
+       GEM_BUG_ON(!atomic_read(&ref->count));
        __i915_active_request_set(active, rq);
 
 out:
@@ -312,6 +398,11 @@ int i915_active_wait(struct i915_active *ref)
        }
 
        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+               if (is_barrier(&it->base)) { /* unconnected idle-barrier */
+                       err = -EBUSY;
+                       break;
+               }
+
                err = i915_active_request_retire(&it->base, BKL(ref));
                if (err)
                        break;
@@ -374,6 +465,92 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+static inline bool is_idle_barrier(struct active_node *node, u64 idx)
+{
+       return node->timeline == idx && !i915_active_request_isset(&node->base);
+}
+
+static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
+{
+       struct rb_node *prev, *p;
+
+       if (RB_EMPTY_ROOT(&ref->tree))
+               return NULL;
+
+       mutex_lock(&ref->mutex);
+       GEM_BUG_ON(i915_active_is_idle(ref));
+
+       /*
+        * Try to reuse any existing barrier nodes already allocated for this
+        * i915_active, due to overlapping active phases there is likely a
+        * node kept alive (as we reuse before parking). We prefer to reuse
+        * completely idle barriers (less hassle in manipulating the llists),
+        * but otherwise any will do.
+        */
+       if (ref->cache && is_idle_barrier(ref->cache, idx)) {
+               p = &ref->cache->node;
+               goto match;
+       }
+
+       prev = NULL;
+       p = ref->tree.rb_node;
+       while (p) {
+               struct active_node *node =
+                       rb_entry(p, struct active_node, node);
+
+               if (is_idle_barrier(node, idx))
+                       goto match;
+
+               prev = p;
+               if (node->timeline < idx)
+                       p = p->rb_right;
+               else
+                       p = p->rb_left;
+       }
+
+       /*
+        * No quick match, but we did find the leftmost rb_node for the
+        * kernel_context. Walk the rb_tree in-order to see if there were
+        * any idle-barriers on this timeline that we missed, or just use
+        * the first pending barrier.
+        */
+       for (p = prev; p; p = rb_next(p)) {
+               struct active_node *node =
+                       rb_entry(p, struct active_node, node);
+
+               if (node->timeline > idx)
+                       break;
+
+               if (node->timeline < idx)
+                       continue;
+
+               if (is_idle_barrier(node, idx))
+                       goto match;
+
+               /*
+                * The list of pending barriers is protected by the
+                * kernel_context timeline, which notably we do not hold
+                * here. i915_request_add_active_barriers() may consume
+                * the barrier before we claim it, so we have to check
+                * for success.
+                */
+               if (is_barrier(&node->base) && __active_del_barrier(ref, node))
+                       goto match;
+       }
+
+       mutex_unlock(&ref->mutex);
+
+       return NULL;
+
+match:
+       rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
+       if (p == &ref->cache->node)
+               ref->cache = NULL;
+       mutex_unlock(&ref->mutex);
+
+       return rb_entry(p, struct active_node, node);
+}
+
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
                                            struct intel_engine_cs *engine)
 {
@@ -382,39 +559,61 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
        struct llist_node *pos, *next;
        int err;
 
-       GEM_BUG_ON(!mask);
+       GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+
+       /*
+        * Preallocate a node for each physical engine supporting the target
+        * engine (remember virtual engines have more than one sibling).
+        * We can then use the preallocated nodes in
+        * i915_active_acquire_barrier()
+        */
        for_each_engine_masked(engine, i915, mask, tmp) {
-               struct intel_context *kctx = engine->kernel_context;
+               u64 idx = engine->kernel_context->ring->timeline->fence_context;
                struct active_node *node;
 
-               node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-               if (unlikely(!node)) {
-                       err = -ENOMEM;
-                       goto unwind;
+               node = reuse_idle_barrier(ref, idx);
+               if (!node) {
+                       node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+                       if (!node) {
+                               err = ENOMEM;
+                               goto unwind;
+                       }
+
+                       RCU_INIT_POINTER(node->base.request, NULL);
+                       node->base.retire = node_retire;
+                       node->timeline = idx;
+                       node->ref = ref;
                }
 
-               i915_active_request_init(&node->base,
-                                        (void *)engine, node_retire);
-               node->timeline = kctx->ring->timeline->fence_context;
-               node->ref = ref;
-               atomic_inc(&ref->count);
+               if (!i915_active_request_isset(&node->base)) {
+                       /*
+                        * Mark this as being *our* unconnected proto-node.
+                        *
+                        * Since this node is not in any list, and we have
+                        * decoupled it from the rbtree, we can reuse the
+                        * request to indicate this is an idle-barrier node
+                        * and then we can use the rb_node and list pointers
+                        * for our tracking of the pending barrier.
+                        */
+                       RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+                       node->base.link.prev = (void *)engine;
+                       atomic_inc(&ref->count);
+               }
 
+               GEM_BUG_ON(barrier_to_engine(node) != engine);
+               llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
                intel_engine_pm_get(engine);
-               llist_add((struct llist_node *)&node->base.link,
-                         &ref->barriers);
        }
 
        return 0;
 
 unwind:
-       llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
-               struct active_node *node;
+       llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+               struct active_node *node = barrier_from_ll(pos);
 
-               node = container_of((struct list_head *)pos,
-                                   typeof(*node), base.link);
-               engine = (void *)rcu_access_pointer(node->base.request);
+               atomic_dec(&ref->count);
+               intel_engine_pm_put(barrier_to_engine(node));
 
-               intel_engine_pm_put(engine);
                kmem_cache_free(global.slab_cache, node);
        }
        return err;
@@ -426,25 +625,27 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 
        GEM_BUG_ON(i915_active_is_idle(ref));
 
+       /*
+        * Transfer the list of preallocated barriers into the
+        * i915_active rbtree, but only as proto-nodes. They will be
+        * populated by i915_request_add_active_barriers() to point to the
+        * request that will eventually release them.
+        */
        mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
-       llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
-               struct intel_engine_cs *engine;
-               struct active_node *node;
+       llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+               struct active_node *node = barrier_from_ll(pos);
+               struct intel_engine_cs *engine = barrier_to_engine(node);
                struct rb_node **p, *parent;
 
-               node = container_of((struct list_head *)pos,
-                                   typeof(*node), base.link);
-
-               engine = (void *)rcu_access_pointer(node->base.request);
-               RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-
                parent = NULL;
                p = &ref->tree.rb_node;
                while (*p) {
+                       struct active_node *it;
+
                        parent = *p;
-                       if (rb_entry(parent,
-                                    struct active_node,
-                                    node)->timeline < node->timeline)
+
+                       it = rb_entry(parent, struct active_node, node);
+                       if (it->timeline < node->timeline)
                                p = &parent->rb_right;
                        else
                                p = &parent->rb_left;
@@ -452,20 +653,29 @@ void i915_active_acquire_barrier(struct i915_active *ref)
                rb_link_node(&node->node, parent, p);
                rb_insert_color(&node->node, &ref->tree);
 
-               llist_add((struct llist_node *)&node->base.link,
-                         &engine->barrier_tasks);
+               llist_add(barrier_to_ll(node), &engine->barrier_tasks);
                intel_engine_pm_put(engine);
        }
        mutex_unlock(&ref->mutex);
 }
 
-void i915_request_add_barriers(struct i915_request *rq)
+void i915_request_add_active_barriers(struct i915_request *rq)
 {
        struct intel_engine_cs *engine = rq->engine;
        struct llist_node *node, *next;
 
-       llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+       GEM_BUG_ON(intel_engine_is_virtual(engine));
+       GEM_BUG_ON(rq->timeline != engine->kernel_context->ring->timeline);
+
+       /*
+        * Attach the list of proto-fences to the in-flight request such
+        * that the parent i915_active will be released when this request
+        * is retired.
+        */
+       llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+               RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
                list_add_tail((struct list_head *)node, &rq->active_list);
+       }
 }
 
 int i915_active_request_set(struct i915_active_request *active,