drm/i915: Store the BIT(engine->id) as the engine's mask

[linux-block.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index f250109e1f66300714a72c65ed637293a92a7888..943a221acb21a30e8d913fe458d6333e49b37f13 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -753,6 +753,68 @@ static int eb_select_context(struct i915_execbuffer *eb)
         return 0;
  }
  
+static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
+{
+       struct i915_request *rq;
+
+       /*
+        * Completely unscientific finger-in-the-air estimates for suitable
+        * maximum user request size (to avoid blocking) and then backoff.
+        */
+       if (intel_ring_update_space(ring) >= PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Find a request that after waiting upon, there will be at least half
+        * the ring available. The hysteresis allows us to compete for the
+        * shared ring and should mean that we sleep less often prior to
+        * claiming our resources, but not so long that the ring completely
+        * drains before we can submit our next request.
+        */
+       list_for_each_entry(rq, &ring->request_list, ring_link) {
+               if (__intel_ring_space(rq->postfix,
+                                      ring->emit, ring->size) > ring->size / 2)
+                       break;
+       }
+       if (&rq->ring_link == &ring->request_list)
+               return NULL; /* weird, we will check again later for real */
+
+       return i915_request_get(rq);
+}
+
+static int eb_wait_for_ring(const struct i915_execbuffer *eb)
+{
+       const struct intel_context *ce;
+       struct i915_request *rq;
+       int ret = 0;
+
+       /*
+        * Apply a light amount of backpressure to prevent excessive hogs
+        * from blocking waiting for space whilst holding struct_mutex and
+        * keeping all of their resources pinned.
+        */
+
+       ce = to_intel_context(eb->ctx, eb->engine);
+       if (!ce->ring) /* first use, assume empty! */
+               return 0;
+
+       rq = __eb_wait_for_ring(ce->ring);
+       if (rq) {
+               mutex_unlock(&eb->i915->drm.struct_mutex);
+
+               if (i915_request_wait(rq,
+                                     I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0)
+                       ret = -EINTR;
+
+               i915_request_put(rq);
+
+               mutex_lock(&eb->i915->drm.struct_mutex);
+       }
+
+       return ret;
+}
+
  static int eb_lookup_vmas(struct i915_execbuffer *eb)
  {
         struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
@@ -787,12 +849,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
                 }
  
                 vma = i915_vma_instance(obj, eb->vm, NULL);
-               if (unlikely(IS_ERR(vma))) {
+               if (IS_ERR(vma)) {
                         err = PTR_ERR(vma);
                         goto err_obj;
                 }
  
-               lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+               lut = i915_lut_handle_alloc();
                 if (unlikely(!lut)) {
                         err = -ENOMEM;
                         goto err_obj;
@@ -800,7 +862,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
  
                 err = radix_tree_insert(handles_vma, handle, vma);
                 if (unlikely(err)) {
-                       kmem_cache_free(eb->i915->luts, lut);
+                       i915_lut_handle_free(lut);
                         goto err_obj;
                 }
  
@@ -1895,7 +1957,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
         u32 *cs;
         int i;
  
-       if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS) {
+       if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
                 DRM_DEBUG("sol reset is gen7/rcs only\n");
                 return -EINVAL;
         }
@@ -1976,6 +2038,18 @@ static int eb_submit(struct i915_execbuffer *eb)
                         return err;
         }
  
+       /*
+        * After we completed waiting for other engines (using HW semaphores)
+        * then we can signal that this request/batch is ready to run. This
+        * allows us to determine if the batch is still waiting on the GPU
+        * or actually running by checking the breadcrumb.
+        */
+       if (eb->engine->emit_init_breadcrumb) {
+               err = eb->engine->emit_init_breadcrumb(eb->request);
+               if (err)
+                       return err;
+       }
+
         err = eb->engine->emit_bb_start(eb->request,
                                         eb->batch->node.start +
                                         eb->batch_start_offset,
@@ -2008,11 +2082,11 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
  #define I915_USER_RINGS (4)
  
  static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
-       [I915_EXEC_DEFAULT]     = RCS,
-       [I915_EXEC_RENDER]      = RCS,
-       [I915_EXEC_BLT]         = BCS,
-       [I915_EXEC_BSD]         = VCS,
-       [I915_EXEC_VEBOX]       = VECS
+       [I915_EXEC_DEFAULT]     = RCS0,
+       [I915_EXEC_RENDER]      = RCS0,
+       [I915_EXEC_BLT]         = BCS0,
+       [I915_EXEC_BSD]         = VCS0,
+       [I915_EXEC_VEBOX]       = VECS0
  };
  
  static struct intel_engine_cs *
@@ -2035,7 +2109,7 @@ eb_select_engine(struct drm_i915_private *dev_priv,
                 return NULL;
         }
  
-       if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+       if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
                 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
  
                 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2238,10 +2312,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (args->flags & I915_EXEC_IS_PINNED)
                 eb.batch_flags |= I915_DISPATCH_PINNED;
  
-       eb.engine = eb_select_engine(eb.i915, file, args);
-       if (!eb.engine)
-               return -EINVAL;
-
         if (args->flags & I915_EXEC_FENCE_IN) {
                 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
                 if (!in_fence)
@@ -2266,6 +2336,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (unlikely(err))
                 goto err_destroy;
  
+       eb.engine = eb_select_engine(eb.i915, file, args);
+       if (!eb.engine) {
+               err = -EINVAL;
+               goto err_engine;
+       }
+
         /*
          * Take a local wakeref for preparing to dispatch the execbuf as
          * we expect to access the hardware fairly frequently in the
@@ -2279,6 +2355,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (err)
                 goto err_rpm;
  
+       err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+       if (unlikely(err))
+               goto err_unlock;
+
         err = eb_relocate(&eb);
         if (err) {
                 /*
@@ -2423,9 +2503,11 @@ err_batch_unpin:
  err_vma:
         if (eb.exec)
                 eb_release_vmas(&eb);
+err_unlock:
         mutex_unlock(&dev->struct_mutex);
  err_rpm:
         intel_runtime_pm_put(eb.i915, wakeref);
+err_engine:
         i915_gem_context_put(eb.ctx);
  err_destroy:
         eb_destroy(&eb);