drm/i915/gem: Use chained reloc batches

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 1 May 2020 19:29:43 +0000 (20:29 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 1 May 2020 21:56:15 +0000 (22:56 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 1 May 2020 19:29:43 +0000 (20:29 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 1 May 2020 21:56:15 +0000 (22:56 +0100)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 414859fa2673b894afa8505cbd8dc4eb5e025b9f..0411618d66a9883780c0468b8eb2762cb520d3a0 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -269,6 +269,7 @@ struct i915_execbuffer {
                 bool needs_unfenced : 1;
  
                 struct i915_request *rq;
+               struct i915_vma *rq_vma;
                 u32 *rq_cmd;
                 unsigned int rq_size;
         } reloc_cache;
@@ -975,20 +976,114 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
         return &i915->ggtt;
  }
  
+#define RELOC_TAIL 4
+
+static int reloc_gpu_chain(struct reloc_cache *cache)
+{
+       struct intel_gt_buffer_pool_node *pool;
+       struct i915_request *rq = cache->rq;
+       struct i915_vma *batch;
+       u32 *cmd;
+       int err;
+
+       pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
+       if (IS_ERR(pool))
+               return PTR_ERR(pool);
+
+       batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
+       if (IS_ERR(batch)) {
+               err = PTR_ERR(batch);
+               goto out_pool;
+       }
+
+       err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+       if (err)
+               goto out_pool;
+
+       GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE  / sizeof(u32));
+       cmd = cache->rq_cmd + cache->rq_size;
+       *cmd++ = MI_ARB_CHECK;
+       if (cache->gen >= 8) {
+               *cmd++ = MI_BATCH_BUFFER_START_GEN8;
+               *cmd++ = lower_32_bits(batch->node.start);
+               *cmd++ = upper_32_bits(batch->node.start);
+       } else {
+               *cmd++ = MI_BATCH_BUFFER_START;
+               *cmd++ = lower_32_bits(batch->node.start);
+       }
+       i915_gem_object_flush_map(cache->rq_vma->obj);
+       i915_gem_object_unpin_map(cache->rq_vma->obj);
+       cache->rq_vma = NULL;
+
+       err = intel_gt_buffer_pool_mark_active(pool, rq);
+       if (err == 0) {
+               i915_vma_lock(batch);
+               err = i915_request_await_object(rq, batch->obj, false);
+               if (err == 0)
+                       err = i915_vma_move_to_active(batch, rq, 0);
+               i915_vma_unlock(batch);
+       }
+       i915_vma_unpin(batch);
+       if (err)
+               goto out_pool;
+
+       cmd = i915_gem_object_pin_map(batch->obj,
+                                     cache->has_llc ?
+                                     I915_MAP_FORCE_WB :
+                                     I915_MAP_FORCE_WC);
+       if (IS_ERR(cmd)) {
+               err = PTR_ERR(cmd);
+               goto out_pool;
+       }
+
+       /* Return with batch mapping (cmd) still pinned */
+       cache->rq_cmd = cmd;
+       cache->rq_size = 0;
+       cache->rq_vma = batch;
+
+out_pool:
+       intel_gt_buffer_pool_put(pool);
+       return err;
+}
+
+static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
+{
+       return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+}
+
  static void reloc_gpu_flush(struct reloc_cache *cache)
  {
-       struct drm_i915_gem_object *obj = cache->rq->batch->obj;
+       struct i915_request *rq;
+       int err;
  
-       GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
-       cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
+       rq = fetch_and_zero(&cache->rq);
+       if (!rq)
+               return;
  
-       __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
-       i915_gem_object_unpin_map(obj);
+       if (cache->rq_vma) {
+               struct drm_i915_gem_object *obj = cache->rq_vma->obj;
  
-       intel_gt_chipset_flush(cache->rq->engine->gt);
+               GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+               cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
  
-       i915_request_add(cache->rq);
-       cache->rq = NULL;
+               __i915_gem_object_flush_map(obj,
+                                           0, sizeof(u32) * cache->rq_size);
+               i915_gem_object_unpin_map(obj);
+       }
+
+       err = 0;
+       if (rq->engine->emit_init_breadcrumb)
+               err = rq->engine->emit_init_breadcrumb(rq);
+       if (!err)
+               err = rq->engine->emit_bb_start(rq,
+                                               rq->batch->node.start,
+                                               PAGE_SIZE,
+                                               reloc_bb_flags(cache));
+       if (err)
+               i915_request_set_error_once(rq, err);
+
+       intel_gt_chipset_flush(rq->engine->gt);
+       i915_request_add(rq);
  }
  
  static void reloc_cache_reset(struct reloc_cache *cache)
@@ -1237,12 +1332,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
         if (err)
                 goto err_request;
  
-       err = eb->engine->emit_bb_start(rq,
-                                       batch->node.start, PAGE_SIZE,
-                                       cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
-       if (err)
-               goto skip_request;
-
         i915_vma_lock(batch);
         err = i915_request_await_object(rq, batch->obj, false);
         if (err == 0)
@@ -1257,6 +1346,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
         cache->rq = rq;
         cache->rq_cmd = cmd;
         cache->rq_size = 0;
+       cache->rq_vma = batch;
  
         /* Return with batch mapping (cmd) still pinned */
         goto out_pool;
@@ -1280,13 +1370,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
  {
         struct reloc_cache *cache = &eb->reloc_cache;
         u32 *cmd;
-
-       if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
-               reloc_gpu_flush(cache);
+       int err;
  
         if (unlikely(!cache->rq)) {
-               int err;
-
                 if (!intel_engine_can_store_dword(eb->engine))
                         return ERR_PTR(-ENODEV);
  
@@ -1295,6 +1381,16 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
                         return ERR_PTR(err);
         }
  
+       if (unlikely(cache->rq_size + len >
+                    PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
+               err = reloc_gpu_chain(cache);
+               if (unlikely(err)) {
+                       i915_request_set_error_once(cache->rq, err);
+                       return ERR_PTR(err);
+               }
+       }
+
+       GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE  / sizeof(u32));
         cmd = cache->rq_cmd + cache->rq_size;
         cache->rq_size += len;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 1 May 2020 19:29:43 +0000 (20:29 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 1 May 2020 21:56:15 +0000 (22:56 +0100)