drm/i915: Stop caching the "golden" renderstate
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 10 Nov 2017 14:26:34 +0000 (14:26 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 10 Nov 2017 17:23:22 +0000 (17:23 +0000)
As we now record the default HW state and so only emit the "golden"
renderstate once to prepare the HW, there is no advantage in keeping the
renderstate batch around as it will never be used again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-8-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_render_state.c
drivers/gpu/drm/i915/i915_gem_render_state.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 0be9a918697a340ece9d9a6a69a6726c812ca9d8..40012b6daea27cb681bc7283d70ecf606519f343 100644 (file)
@@ -67,7 +67,6 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
-#include "i915_gem_render_state.h"
 #include "i915_gem_request.h"
 #include "i915_gem_timeline.h"
 
index 3703dc91eedab90e1241bc8e37550d1ac95d2470..c2723a06fbb4e2eec80a5e137ac03590860c4f40 100644 (file)
  */
 
 #include "i915_drv.h"
+#include "i915_gem_render_state.h"
 #include "intel_renderstate.h"
 
 struct intel_render_state {
        const struct intel_renderstate_rodata *rodata;
+       struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
        u32 batch_offset;
        u32 batch_size;
@@ -40,6 +42,9 @@ struct intel_render_state {
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(const struct intel_engine_cs *engine)
 {
+       if (engine->id != RCS)
+               return NULL;
+
        switch (INTEL_GEN(engine->i915)) {
        case 6:
                return &gen6_null_state;
@@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so,
                              struct drm_i915_private *i915)
 {
        const struct intel_renderstate_rodata *rodata = so->rodata;
-       struct drm_i915_gem_object *obj = so->vma->obj;
        unsigned int i = 0, reloc_index = 0;
        unsigned int needs_clflush;
        u32 *d;
        int ret;
 
-       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+       ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
        if (ret)
                return ret;
 
-       d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
+       d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0));
 
        while (i < rodata->batch_items) {
                u32 s = rodata->batch[i];
@@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so,
                goto err;
        }
 
-       so->batch_offset = so->vma->node.start;
+       so->batch_offset = i915_ggtt_offset(so->vma);
        so->batch_size = rodata->batch_items * sizeof(u32);
 
        while (i % CACHELINE_DWORDS)
@@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so,
                drm_clflush_virt_range(d, i * sizeof(u32));
        kunmap_atomic(d);
 
-       ret = i915_gem_object_set_to_gtt_domain(obj, false);
+       ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
 out:
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_obj_finish_shmem_access(so->obj);
        return ret;
 
 err:
@@ -173,112 +177,61 @@ err:
 
 #undef OUT_BATCH
 
-int i915_gem_render_state_init(struct intel_engine_cs *engine)
+int i915_gem_render_state_emit(struct drm_i915_gem_request *rq)
 {
-       struct intel_render_state *so;
-       const struct intel_renderstate_rodata *rodata;
-       struct drm_i915_gem_object *obj;
-       int ret;
+       struct intel_engine_cs *engine = rq->engine;
+       struct intel_render_state so = {}; /* keep the compiler happy */
+       int err;
 
-       if (engine->id != RCS)
+       so.rodata = render_state_get_rodata(engine);
+       if (!so.rodata)
                return 0;
 
-       rodata = render_state_get_rodata(engine);
-       if (!rodata)
-               return 0;
-
-       if (rodata->batch_items * 4 > PAGE_SIZE)
+       if (so.rodata->batch_items * 4 > PAGE_SIZE)
                return -EINVAL;
 
-       so = kmalloc(sizeof(*so), GFP_KERNEL);
-       if (!so)
-               return -ENOMEM;
-
-       obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-       if (IS_ERR(obj)) {
-               ret = PTR_ERR(obj);
-               goto err_free;
-       }
+       so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+       if (IS_ERR(so.obj))
+               return PTR_ERR(so.obj);
 
-       so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
-       if (IS_ERR(so->vma)) {
-               ret = PTR_ERR(so->vma);
+       so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL);
+       if (IS_ERR(so.vma)) {
+               err = PTR_ERR(so.vma);
                goto err_obj;
        }
 
-       so->rodata = rodata;
-       engine->render_state = so;
-       return 0;
-
-err_obj:
-       i915_gem_object_put(obj);
-err_free:
-       kfree(so);
-       return ret;
-}
-
-int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
-{
-       struct intel_render_state *so;
-       int ret;
-
-       lockdep_assert_held(&req->i915->drm.struct_mutex);
-
-       so = req->engine->render_state;
-       if (!so)
-               return 0;
-
-       /* Recreate the page after shrinking */
-       if (!i915_gem_object_has_pages(so->vma->obj))
-               so->batch_offset = -1;
-
-       ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-       if (ret)
-               return ret;
+       err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       if (err)
+               goto err_vma;
 
-       if (so->vma->node.start != so->batch_offset) {
-               ret = render_state_setup(so, req->i915);
-               if (ret)
-                       goto err_unpin;
-       }
+       err = render_state_setup(&so, rq->i915);
+       if (err)
+               goto err_unpin;
 
-       ret = req->engine->emit_flush(req, EMIT_INVALIDATE);
-       if (ret)
+       err = engine->emit_flush(rq, EMIT_INVALIDATE);
+       if (err)
                goto err_unpin;
 
-       ret = req->engine->emit_bb_start(req,
-                                        so->batch_offset, so->batch_size,
-                                        I915_DISPATCH_SECURE);
-       if (ret)
+       err = engine->emit_bb_start(rq,
+                                   so.batch_offset, so.batch_size,
+                                   I915_DISPATCH_SECURE);
+       if (err)
                goto err_unpin;
 
-       if (so->aux_size > 8) {
-               ret = req->engine->emit_bb_start(req,
-                                                so->aux_offset, so->aux_size,
-                                                I915_DISPATCH_SECURE);
-               if (ret)
+       if (so.aux_size > 8) {
+               err = engine->emit_bb_start(rq,
+                                           so.aux_offset, so.aux_size,
+                                           I915_DISPATCH_SECURE);
+               if (err)
                        goto err_unpin;
        }
 
-       i915_vma_move_to_active(so->vma, req, 0);
+       i915_vma_move_to_active(so.vma, rq, 0);
 err_unpin:
-       i915_vma_unpin(so->vma);
-       return ret;
-}
-
-void i915_gem_render_state_fini(struct intel_engine_cs *engine)
-{
-       struct intel_render_state *so;
-       struct drm_i915_gem_object *obj;
-
-       so = fetch_and_zero(&engine->render_state);
-       if (!so)
-               return;
-
-       obj = so->vma->obj;
-
-       i915_vma_close(so->vma);
-       __i915_gem_object_release_unless_active(obj);
-
-       kfree(so);
+       i915_vma_unpin(so.vma);
+err_vma:
+       i915_vma_close(so.vma);
+err_obj:
+       __i915_gem_object_release_unless_active(so.obj);
+       return err;
 }
index 87481845799d39e9990e09bd07e319a91543a97e..86369520482e4c6c8da55d7974b4823522e27797 100644 (file)
@@ -26,8 +26,6 @@
 
 struct drm_i915_gem_request;
 
-int i915_gem_render_state_init(struct intel_engine_cs *engine);
-int i915_gem_render_state_emit(struct drm_i915_gem_request *req);
-void i915_gem_render_state_fini(struct intel_engine_cs *engine);
+int i915_gem_render_state_emit(struct drm_i915_gem_request *rq);
 
 #endif /* _I915_GEM_RENDER_STATE_H_ */
index 868c07a693b55c33b929070f6925b65eaca0f933..a0f9d0eb4bce97a1c25bb00ca134153ff72892a3 100644 (file)
@@ -641,21 +641,15 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
        if (ret)
                goto err_unpin_preempt;
 
-       ret = i915_gem_render_state_init(engine);
-       if (ret)
-               goto err_breadcrumbs;
-
        if (HWS_NEEDS_PHYSICAL(engine->i915))
                ret = init_phys_status_page(engine);
        else
                ret = init_status_page(engine);
        if (ret)
-               goto err_rs_fini;
+               goto err_breadcrumbs;
 
        return 0;
 
-err_rs_fini:
-       i915_gem_render_state_fini(engine);
 err_breadcrumbs:
        intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
@@ -682,7 +676,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
        else
                cleanup_status_page(engine);
 
-       i915_gem_render_state_fini(engine);
        intel_engine_fini_breadcrumbs(engine);
        intel_engine_cleanup_cmd_parser(engine);
        i915_gem_batch_pool_fini(&engine->batch_pool);
index 0c93f27f36eef49669cdf420894040dd1f84108f..58d050a9a8665cc06d803f413e2794d63b09d731 100644 (file)
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_render_state.h"
 #include "intel_mocs.h"
 
 #define RING_EXECLIST_QFULL            (1 << 0x2)
index 464dc58af27b85c82d937fe07aced2c68a84c5b8..3321b801e77d7f2cfd6664972429a70e99925dfc 100644 (file)
  */
 
 #include <linux/log2.h>
+
 #include <drm/drmP.h>
-#include "i915_drv.h"
 #include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem_render_state.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 
index 337222859166980bd1fd45bcf8e580ce4964c308..ef22c994038bb6d3db4e31226ced916a11b97baa 100644 (file)
@@ -165,7 +165,6 @@ struct i915_ctx_workarounds {
 };
 
 struct drm_i915_gem_request;
-struct intel_render_state;
 
 /*
  * Engine IDs definitions.
@@ -307,7 +306,6 @@ struct intel_engine_cs {
        struct intel_timeline *timeline;
 
        struct drm_i915_gem_object *default_state;
-       struct intel_render_state *render_state;
 
        atomic_t irq_count;
        unsigned long irq_posted;