drm/i915: Store the BIT(engine->id) as the engine's mask
[linux-block.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
index 485b259127c36fdb4aeb619559e790f394ff412a..943a221acb21a30e8d913fe458d6333e49b37f13 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
@@ -754,6 +753,68 @@ static int eb_select_context(struct i915_execbuffer *eb)
        return 0;
 }
 
+static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
+{
+       struct i915_request *rq;
+
+       /*
+        * Completely unscientific finger-in-the-air estimates for suitable
+        * maximum user request size (to avoid blocking) and then backoff.
+        */
+       if (intel_ring_update_space(ring) >= PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Find a request that after waiting upon, there will be at least half
+        * the ring available. The hysteresis allows us to compete for the
+        * shared ring and should mean that we sleep less often prior to
+        * claiming our resources, but not so long that the ring completely
+        * drains before we can submit our next request.
+        */
+       list_for_each_entry(rq, &ring->request_list, ring_link) {
+               if (__intel_ring_space(rq->postfix,
+                                      ring->emit, ring->size) > ring->size / 2)
+                       break;
+       }
+       if (&rq->ring_link == &ring->request_list)
+               return NULL; /* weird, we will check again later for real */
+
+       return i915_request_get(rq);
+}
+
+static int eb_wait_for_ring(const struct i915_execbuffer *eb)
+{
+       const struct intel_context *ce;
+       struct i915_request *rq;
+       int ret = 0;
+
+       /*
+        * Apply a light amount of backpressure to prevent excessive hogs
+        * from blocking waiting for space whilst holding struct_mutex and
+        * keeping all of their resources pinned.
+        */
+
+       ce = to_intel_context(eb->ctx, eb->engine);
+       if (!ce->ring) /* first use, assume empty! */
+               return 0;
+
+       rq = __eb_wait_for_ring(ce->ring);
+       if (rq) {
+               mutex_unlock(&eb->i915->drm.struct_mutex);
+
+               if (i915_request_wait(rq,
+                                     I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0)
+                       ret = -EINTR;
+
+               i915_request_put(rq);
+
+               mutex_lock(&eb->i915->drm.struct_mutex);
+       }
+
+       return ret;
+}
+
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
        struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
@@ -788,12 +849,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
                }
 
                vma = i915_vma_instance(obj, eb->vm, NULL);
-               if (unlikely(IS_ERR(vma))) {
+               if (IS_ERR(vma)) {
                        err = PTR_ERR(vma);
                        goto err_obj;
                }
 
-               lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+               lut = i915_lut_handle_alloc();
                if (unlikely(!lut)) {
                        err = -ENOMEM;
                        goto err_obj;
@@ -801,7 +862,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
                err = radix_tree_insert(handles_vma, handle, vma);
                if (unlikely(err)) {
-                       kmem_cache_free(eb->i915->luts, lut);
+                       i915_lut_handle_free(lut);
                        goto err_obj;
                }
 
@@ -1380,7 +1441,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
                 * batchbuffers.
                 */
                if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
-                   IS_GEN6(eb->i915)) {
+                   IS_GEN(eb->i915, 6)) {
                        err = i915_vma_bind(target, target->obj->cache_level,
                                            PIN_GLOBAL);
                        if (WARN_ONCE(err,
@@ -1896,7 +1957,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
        u32 *cs;
        int i;
 
-       if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) {
+       if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
                DRM_DEBUG("sol reset is gen7/rcs only\n");
                return -EINVAL;
        }
@@ -1977,6 +2038,18 @@ static int eb_submit(struct i915_execbuffer *eb)
                        return err;
        }
 
+       /*
+        * After we completed waiting for other engines (using HW semaphores)
+        * then we can signal that this request/batch is ready to run. This
+        * allows us to determine if the batch is still waiting on the GPU
+        * or actually running by checking the breadcrumb.
+        */
+       if (eb->engine->emit_init_breadcrumb) {
+               err = eb->engine->emit_init_breadcrumb(eb->request);
+               if (err)
+                       return err;
+       }
+
        err = eb->engine->emit_bb_start(eb->request,
                                        eb->batch->node.start +
                                        eb->batch_start_offset,
@@ -2009,11 +2082,11 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 #define I915_USER_RINGS (4)
 
 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
-       [I915_EXEC_DEFAULT]     = RCS,
-       [I915_EXEC_RENDER]      = RCS,
-       [I915_EXEC_BLT]         = BCS,
-       [I915_EXEC_BSD]         = VCS,
-       [I915_EXEC_VEBOX]       = VECS
+       [I915_EXEC_DEFAULT]     = RCS0,
+       [I915_EXEC_RENDER]      = RCS0,
+       [I915_EXEC_BLT]         = BCS0,
+       [I915_EXEC_BSD]         = VCS0,
+       [I915_EXEC_VEBOX]       = VECS0
 };
 
 static struct intel_engine_cs *
@@ -2036,7 +2109,7 @@ eb_select_engine(struct drm_i915_private *dev_priv,
                return NULL;
        }
 
-       if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+       if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2203,6 +2276,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        struct i915_execbuffer eb;
        struct dma_fence *in_fence = NULL;
        struct sync_file *out_fence = NULL;
+       intel_wakeref_t wakeref;
        int out_fence_fd = -1;
        int err;
 
@@ -2238,10 +2312,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (args->flags & I915_EXEC_IS_PINNED)
                eb.batch_flags |= I915_DISPATCH_PINNED;
 
-       eb.engine = eb_select_engine(eb.i915, file, args);
-       if (!eb.engine)
-               return -EINVAL;
-
        if (args->flags & I915_EXEC_FENCE_IN) {
                in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
                if (!in_fence)
@@ -2266,6 +2336,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (unlikely(err))
                goto err_destroy;
 
+       eb.engine = eb_select_engine(eb.i915, file, args);
+       if (!eb.engine) {
+               err = -EINVAL;
+               goto err_engine;
+       }
+
        /*
         * Take a local wakeref for preparing to dispatch the execbuf as
         * we expect to access the hardware fairly frequently in the
@@ -2273,12 +2349,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         * wakeref that we hold until the GPU has been idle for at least
         * 100ms.
         */
-       intel_runtime_pm_get(eb.i915);
+       wakeref = intel_runtime_pm_get(eb.i915);
 
        err = i915_mutex_lock_interruptible(dev);
        if (err)
                goto err_rpm;
 
+       err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+       if (unlikely(err))
+               goto err_unlock;
+
        err = eb_relocate(&eb);
        if (err) {
                /*
@@ -2423,9 +2503,11 @@ err_batch_unpin:
 err_vma:
        if (eb.exec)
                eb_release_vmas(&eb);
+err_unlock:
        mutex_unlock(&dev->struct_mutex);
 err_rpm:
-       intel_runtime_pm_put(eb.i915);
+       intel_runtime_pm_put(eb.i915, wakeref);
+err_engine:
        i915_gem_context_put(eb.ctx);
 err_destroy:
        eb_destroy(&eb);