drm/i915/lrc: allocate separate page for HWSP
authorDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Wed, 13 Sep 2017 08:56:02 +0000 (09:56 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 13 Sep 2017 14:02:39 +0000 (15:02 +0100)
On gen8+ we're currently using the PPHWSP of the kernel ctx as the
global HWSP. However, when the kernel ctx gets submitted (e.g. from
__intel_autoenable_gt_powersave) the HW will use that page as both
HWSP and PPHWSP. This causes a conflict in the register arena of the
HWSP, i.e. dword indices below 0x30. We don't current utilize this arena,
but in the following patches we will take advantage of the cached
register state for handling execlist's context status interrupt.

To avoid the conflict, instead of re-using the PPHWSP of the kernel
ctx we can allocate a separate page for the HWSP like what happens for
pre-execlists platform.

v2: Add a use-case for the register arena of the HWSP.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1499357440-34688-1-git-send-email-daniele.ceraolospurio@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170913085605.18299-3-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c

index 3ae89a9d62419c85e490dddde7d3b993af12496f..8a5535ad6552cf072cd9cdb4d13f5b0eb02a3b41 100644 (file)
@@ -442,6 +442,114 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
        i915_vma_unpin_and_release(&engine->scratch);
 }
 
+static void cleanup_phys_status_page(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+
+       if (!dev_priv->status_page_dmah)
+               return;
+
+       drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
+       engine->status_page.page_addr = NULL;
+}
+
+static void cleanup_status_page(struct intel_engine_cs *engine)
+{
+       struct i915_vma *vma;
+       struct drm_i915_gem_object *obj;
+
+       vma = fetch_and_zero(&engine->status_page.vma);
+       if (!vma)
+               return;
+
+       obj = vma->obj;
+
+       i915_vma_unpin(vma);
+       i915_vma_close(vma);
+
+       i915_gem_object_unpin_map(obj);
+       __i915_gem_object_release_unless_active(obj);
+}
+
+static int init_status_page(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       unsigned int flags;
+       void *vaddr;
+       int ret;
+
+       obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+       if (IS_ERR(obj)) {
+               DRM_ERROR("Failed to allocate status page\n");
+               return PTR_ERR(obj);
+       }
+
+       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+       if (ret)
+               goto err;
+
+       vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto err;
+       }
+
+       flags = PIN_GLOBAL;
+       if (!HAS_LLC(engine->i915))
+               /* On g33, we cannot place HWS above 256MiB, so
+                * restrict its pinning to the low mappable arena.
+                * Though this restriction is not documented for
+                * gen4, gen5, or byt, they also behave similarly
+                * and hang if the HWS is placed at the top of the
+                * GTT. To generalise, it appears that all !llc
+                * platforms have issues with us placing the HWS
+                * above the mappable region (even though we never
+                * actually map it).
+                */
+               flags |= PIN_MAPPABLE;
+       ret = i915_vma_pin(vma, 0, 4096, flags);
+       if (ret)
+               goto err;
+
+       vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+       if (IS_ERR(vaddr)) {
+               ret = PTR_ERR(vaddr);
+               goto err_unpin;
+       }
+
+       engine->status_page.vma = vma;
+       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
+       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+       DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
+                        engine->name, i915_ggtt_offset(vma));
+       return 0;
+
+err_unpin:
+       i915_vma_unpin(vma);
+err:
+       i915_gem_object_put(obj);
+       return ret;
+}
+
+static int init_phys_status_page(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+
+       GEM_BUG_ON(engine->id != RCS);
+
+       dev_priv->status_page_dmah =
+               drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
+       if (!dev_priv->status_page_dmah)
+               return -ENOMEM;
+
+       engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
+       memset(engine->status_page.page_addr, 0, PAGE_SIZE);
+
+       return 0;
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -477,10 +585,21 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
        ret = i915_gem_render_state_init(engine);
        if (ret)
-               goto err_unpin;
+               goto err_breadcrumbs;
+
+       if (HWS_NEEDS_PHYSICAL(engine->i915))
+               ret = init_phys_status_page(engine);
+       else
+               ret = init_status_page(engine);
+       if (ret)
+               goto err_rs_fini;
 
        return 0;
 
+err_rs_fini:
+       i915_gem_render_state_fini(engine);
+err_breadcrumbs:
+       intel_engine_fini_breadcrumbs(engine);
 err_unpin:
        engine->context_unpin(engine, engine->i915->kernel_context);
        return ret;
@@ -497,6 +616,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
        intel_engine_cleanup_scratch(engine);
 
+       if (HWS_NEEDS_PHYSICAL(engine->i915))
+               cleanup_phys_status_page(engine);
+       else
+               cleanup_status_page(engine);
+
        i915_gem_render_state_fini(engine);
        intel_engine_fini_breadcrumbs(engine);
        intel_engine_cleanup_cmd_parser(engine);
index 3b8f1a79d640ba2e974e3a9019a298449c0ab7ce..8886e3b60e820ac52581af65011264b9bdb8be1f 100644 (file)
@@ -1674,11 +1674,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
        if (engine->cleanup)
                engine->cleanup(engine);
 
-       if (engine->status_page.vma) {
-               i915_gem_object_unpin_map(engine->status_page.vma->obj);
-               engine->status_page.vma = NULL;
-       }
-
        intel_engine_cleanup_common(engine);
 
        lrc_destroy_wa_ctx(engine);
@@ -1725,24 +1720,6 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static int
-lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
-{
-       const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
-       void *hws;
-
-       /* The HWSP is part of the default context object in LRC mode. */
-       hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-       if (IS_ERR(hws))
-               return PTR_ERR(hws);
-
-       engine->status_page.page_addr = hws + hws_offset;
-       engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
-       engine->status_page.vma = vma;
-
-       return 0;
-}
-
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
@@ -1775,23 +1752,14 @@ logical_ring_setup(struct intel_engine_cs *engine)
        logical_ring_default_irqs(engine);
 }
 
-static int
-logical_ring_init(struct intel_engine_cs *engine)
+static int logical_ring_init(struct intel_engine_cs *engine)
 {
-       struct i915_gem_context *dctx = engine->i915->kernel_context;
        int ret;
 
        ret = intel_engine_init_common(engine);
        if (ret)
                goto error;
 
-       /* And setup the hardware status page. */
-       ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
-       if (ret) {
-               DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
-               goto error;
-       }
-
        return 0;
 
 error:
index 268342433a8e55b48e80d8eaf13d34f29e62391c..8af8871a8594f067d8798917f6ec932f0c5f66d2 100644 (file)
@@ -1175,113 +1175,7 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
        return 0;
 }
 
-static void cleanup_phys_status_page(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-
-       if (!dev_priv->status_page_dmah)
-               return;
-
-       drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
-       engine->status_page.page_addr = NULL;
-}
-
-static void cleanup_status_page(struct intel_engine_cs *engine)
-{
-       struct i915_vma *vma;
-       struct drm_i915_gem_object *obj;
-
-       vma = fetch_and_zero(&engine->status_page.vma);
-       if (!vma)
-               return;
-
-       obj = vma->obj;
-
-       i915_vma_unpin(vma);
-       i915_vma_close(vma);
-
-       i915_gem_object_unpin_map(obj);
-       __i915_gem_object_release_unless_active(obj);
-}
 
-static int init_status_page(struct intel_engine_cs *engine)
-{
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-       unsigned int flags;
-       void *vaddr;
-       int ret;
-
-       obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-       if (IS_ERR(obj)) {
-               DRM_ERROR("Failed to allocate status page\n");
-               return PTR_ERR(obj);
-       }
-
-       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-       if (ret)
-               goto err;
-
-       vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
-       if (IS_ERR(vma)) {
-               ret = PTR_ERR(vma);
-               goto err;
-       }
-
-       flags = PIN_GLOBAL;
-       if (!HAS_LLC(engine->i915))
-               /* On g33, we cannot place HWS above 256MiB, so
-                * restrict its pinning to the low mappable arena.
-                * Though this restriction is not documented for
-                * gen4, gen5, or byt, they also behave similarly
-                * and hang if the HWS is placed at the top of the
-                * GTT. To generalise, it appears that all !llc
-                * platforms have issues with us placing the HWS
-                * above the mappable region (even though we never
-                * actualy map it).
-                */
-               flags |= PIN_MAPPABLE;
-       ret = i915_vma_pin(vma, 0, 4096, flags);
-       if (ret)
-               goto err;
-
-       vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-       if (IS_ERR(vaddr)) {
-               ret = PTR_ERR(vaddr);
-               goto err_unpin;
-       }
-
-       engine->status_page.vma = vma;
-       engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-       engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
-
-       DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
-                        engine->name, i915_ggtt_offset(vma));
-       return 0;
-
-err_unpin:
-       i915_vma_unpin(vma);
-err:
-       i915_gem_object_put(obj);
-       return ret;
-}
-
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-
-       GEM_BUG_ON(engine->id != RCS);
-
-       dev_priv->status_page_dmah =
-               drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
-       if (!dev_priv->status_page_dmah)
-               return -ENOMEM;
-
-       engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-       memset(engine->status_page.page_addr, 0, PAGE_SIZE);
-
-       return 0;
-}
 
 int intel_ring_pin(struct intel_ring *ring,
                   struct drm_i915_private *i915,
@@ -1568,17 +1462,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
        if (err)
                goto err;
 
-       if (HWS_NEEDS_PHYSICAL(engine->i915))
-               err = init_phys_status_page(engine);
-       else
-               err = init_status_page(engine);
-       if (err)
-               goto err;
-
        ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
        if (IS_ERR(ring)) {
                err = PTR_ERR(ring);
-               goto err_hws;
+               goto err;
        }
 
        /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1593,11 +1480,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 
 err_ring:
        intel_ring_free(ring);
-err_hws:
-       if (HWS_NEEDS_PHYSICAL(engine->i915))
-               cleanup_phys_status_page(engine);
-       else
-               cleanup_status_page(engine);
 err:
        intel_engine_cleanup_common(engine);
        return err;
@@ -1616,11 +1498,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
        if (engine->cleanup)
                engine->cleanup(engine);
 
-       if (HWS_NEEDS_PHYSICAL(dev_priv))
-               cleanup_phys_status_page(engine);
-       else
-               cleanup_status_page(engine);
-
        intel_engine_cleanup_common(engine);
 
        dev_priv->engine[engine->id] = NULL;