drm/i915/oa: Reconfigure contexts on the fly
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 16 Jul 2019 21:34:43 +0000 (22:34 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 17 Jul 2019 06:58:27 +0000 (07:58 +0100)
Avoid a global idle barrier by reconfiguring each context by rewriting
them with MI_STORE_DWORD from the kernel context.

v2: We only need to determine the desired register values once, they are
the same for all contexts.
v3: Don't remove the kernel context from the list of known GEM contexts;
the world is not ready for that yet.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190716213443.9874-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_context.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/i915_perf.c

index c5f8bfa3f7b0f702510783b8a66baf9826e19193..ffb59d96d4d8bd74eb681f66ed0711b38e284d35 100644 (file)
@@ -1173,26 +1173,11 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
        if (IS_ERR(rq))
                return PTR_ERR(rq);
 
-       /* Queue this switch after all other activity by this context. */
-       ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
-       if (ret)
-               goto out_add;
-
-       /*
-        * Guarantee context image and the timeline remains pinned until the
-        * modifying request is retired by setting the ce activity tracker.
-        *
-        * But we only need to take one pin on the account of it. Or in other
-        * words transfer the pinned ce object to tracked active request.
-        */
-       GEM_BUG_ON(i915_active_is_idle(&ce->active));
-       ret = i915_active_ref(&ce->active, rq->fence.context, rq);
-       if (ret)
-               goto out_add;
-
-       ret = gen8_emit_rpcs_config(rq, ce, sseu);
+       /* Serialise with the remote context */
+       ret = intel_context_prepare_remote_request(ce, rq);
+       if (ret == 0)
+               ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
-out_add:
        i915_request_add(rq);
        return ret;
 }
index 1110fc8f657ace233049746aeddccc5280b20713..b667e2b358041a032859b267f08bae3a01136d1d 100644 (file)
@@ -239,6 +239,31 @@ void intel_context_exit_engine(struct intel_context *ce)
        intel_engine_pm_put(ce->engine);
 }
 
+int intel_context_prepare_remote_request(struct intel_context *ce,
+                                        struct i915_request *rq)
+{
+       struct intel_timeline *tl = ce->ring->timeline;
+       int err;
+
+       /* Only suitable for use in remotely modifying this context */
+       GEM_BUG_ON(rq->hw_context == ce);
+
+       /* Queue this switch after all other activity by this context. */
+       err = i915_active_request_set(&tl->last_request, rq);
+       if (err)
+               return err;
+
+       /*
+        * Guarantee context image and the timeline remains pinned until the
+        * modifying request is retired by setting the ce activity tracker.
+        *
+        * But we only need to take one pin on the account of it. Or in other
+        * words transfer the pinned ce object to tracked active request.
+        */
+       GEM_BUG_ON(i915_active_is_idle(&ce->active));
+       return i915_active_ref(&ce->active, rq->fence.context, rq);
+}
+
 struct i915_request *intel_context_create_request(struct intel_context *ce)
 {
        struct i915_request *rq;
index 40cd8320fcc39b3310b943cc40a3f7d7315eeaf6..b41c610c2ce6a1479ae1f8f77e54a8128f869a95 100644 (file)
@@ -139,6 +139,9 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce)
        mutex_unlock(&ce->ring->timeline->mutex);
 }
 
+int intel_context_prepare_remote_request(struct intel_context *ce,
+                                        struct i915_request *rq);
+
 struct i915_request *intel_context_create_request(struct intel_context *ce);
 
 #endif /* __INTEL_CONTEXT_H__ */
index a220575a69bc616b40db4141e097d2af8ae5437d..f35a57d6d34a59d4a1b779f56efe32dc9964fcd7 100644 (file)
@@ -1576,9 +1576,12 @@ __execlists_update_reg_state(struct intel_context *ce,
        regs[CTX_RING_TAIL + 1] = ring->tail;
 
        /* RPCS */
-       if (engine->class == RENDER_CLASS)
+       if (engine->class == RENDER_CLASS) {
                regs[CTX_R_PWR_CLK_STATE + 1] =
                        intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+
+               i915_oa_init_reg_state(engine, ce, regs);
+       }
 }
 
 static int
@@ -3001,8 +3004,6 @@ static void execlists_init_reg_state(u32 *regs,
        if (rcs) {
                regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
                CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
-               i915_oa_init_reg_state(engine, ce, regs);
        }
 
        regs[CTX_END] = MI_BATCH_BUFFER_END;
index 007826ded9b32fd3cb5ed84e4dc2eeadc1c8b5a3..ab82ccba896bd52cdd08cb00151a65435ab5cbfa 100644 (file)
@@ -1636,6 +1636,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
                                      ~GT_NOA_ENABLE));
 }
 
+static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
+                             i915_reg_t reg)
+{
+       u32 mmio = i915_mmio_reg_offset(reg);
+       int i;
+
+       /*
+        * This arbitrary default will select the 'EU FPU0 Pipeline
+        * Active' event. In the future it's anticipated that there
+        * will be an explicit 'No Event' we can select, but not yet...
+        */
+       if (!oa_config)
+               return 0;
+
+       for (i = 0; i < oa_config->flex_regs_len; i++) {
+               if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
+                       return oa_config->flex_regs[i].value;
+       }
+
+       return 0;
+}
 /*
  * NB: It must always remain pointer safe to run this even if the OA unit
  * has been disabled.
@@ -1669,28 +1690,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
                GEN8_OA_COUNTER_RESUME);
 
        for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
-               u32 state_offset = ctx_flexeu0 + i * 2;
-               u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
-
-               /*
-                * This arbitrary default will select the 'EU FPU0 Pipeline
-                * Active' event. In the future it's anticipated that there
-                * will be an explicit 'No Event' we can select, but not yet...
-                */
-               u32 value = 0;
-
-               if (oa_config) {
-                       u32 j;
-
-                       for (j = 0; j < oa_config->flex_regs_len; j++) {
-                               if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
-                                       value = oa_config->flex_regs[j].value;
-                                       break;
-                               }
-                       }
-               }
-
-               CTX_REG(reg_state, state_offset, flex_regs[i], value);
+               CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
+                       oa_config_flex_reg(oa_config, flex_regs[i]));
        }
 
        CTX_REG(reg_state,
@@ -1698,6 +1699,99 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
                intel_sseu_make_rpcs(i915, &ce->sseu));
 }
 
+struct flex {
+       i915_reg_t reg;
+       u32 offset;
+       u32 value;
+};
+
+static int
+gen8_store_flex(struct i915_request *rq,
+               struct intel_context *ce,
+               const struct flex *flex, unsigned int count)
+{
+       u32 offset;
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 4 * count);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
+       do {
+               *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *cs++ = offset + (flex->offset + 1) * sizeof(u32);
+               *cs++ = 0;
+               *cs++ = flex->value;
+       } while (flex++, --count);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static int
+gen8_load_flex(struct i915_request *rq,
+              struct intel_context *ce,
+              const struct flex *flex, unsigned int count)
+{
+       u32 *cs;
+
+       GEM_BUG_ON(!count || count > 63);
+
+       cs = intel_ring_begin(rq, 2 * count + 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_LOAD_REGISTER_IMM(count);
+       do {
+               *cs++ = i915_mmio_reg_offset(flex->reg);
+               *cs++ = flex->value;
+       } while (flex++, --count);
+       *cs++ = MI_NOOP;
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static int gen8_modify_context(struct intel_context *ce,
+                              const struct flex *flex, unsigned int count)
+{
+       struct i915_request *rq;
+       int err;
+
+       lockdep_assert_held(&ce->pin_mutex);
+
+       rq = i915_request_create(ce->engine->kernel_context);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       /* Serialise with the remote context */
+       err = intel_context_prepare_remote_request(ce, rq);
+       if (err == 0)
+               err = gen8_store_flex(rq, ce, flex, count);
+
+       i915_request_add(rq);
+       return err;
+}
+
+static int gen8_modify_self(struct intel_context *ce,
+                           const struct flex *flex, unsigned int count)
+{
+       struct i915_request *rq;
+       int err;
+
+       rq = i915_request_create(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       err = gen8_load_flex(rq, ce, flex, count);
+
+       i915_request_add(rq);
+       return err;
+}
+
 /*
  * Manages updating the per-context aspects of the OA stream
  * configuration across all contexts.
@@ -1722,15 +1816,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
  *
  * Note: it's only the RCS/Render context that has any OA state.
  */
-static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
+static int gen8_configure_all_contexts(struct drm_i915_private *i915,
                                       const struct i915_oa_config *oa_config)
 {
-       unsigned int map_type = i915_coherent_map_type(dev_priv);
+       /* The MMIO offsets for Flex EU registers aren't contiguous */
+       const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
+       struct flex regs[] = {
+               {
+                       GEN8_R_PWR_CLK_STATE,
+                       CTX_R_PWR_CLK_STATE,
+               },
+               {
+                       GEN8_OACTXCONTROL,
+                       i915->perf.oa.ctx_oactxctrl_offset,
+                       ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+                        (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+                        GEN8_OA_COUNTER_RESUME)
+               },
+               { EU_PERF_CNTL0, ctx_flexeuN(0) },
+               { EU_PERF_CNTL1, ctx_flexeuN(1) },
+               { EU_PERF_CNTL2, ctx_flexeuN(2) },
+               { EU_PERF_CNTL3, ctx_flexeuN(3) },
+               { EU_PERF_CNTL4, ctx_flexeuN(4) },
+               { EU_PERF_CNTL5, ctx_flexeuN(5) },
+               { EU_PERF_CNTL6, ctx_flexeuN(6) },
+       };
+#undef ctx_flexeuN
+       struct intel_engine_cs *engine;
        struct i915_gem_context *ctx;
-       struct i915_request *rq;
-       int ret;
+       enum intel_engine_id id;
+       int err;
+       int i;
+
+       for (i = 2; i < ARRAY_SIZE(regs); i++)
+               regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
 
-       lockdep_assert_held(&dev_priv->drm.struct_mutex);
+       lockdep_assert_held(&i915->drm.struct_mutex);
 
        /*
         * The OA register config is setup through the context image. This image
@@ -1742,58 +1864,63 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
         * this might leave small interval of time where the OA unit is
         * configured at an invalid sampling period.
         *
-        * So far the best way to work around this issue seems to be draining
-        * the GPU from any submitted work.
+        * Note that since we emit all requests from a single ring, there
+        * is still an implicit global barrier here that may cause a high
+        * priority context to wait for an otherwise independent low priority
+        * context. Contexts idle at the time of reconfiguration are not
+        * trapped behind the barrier.
         */
-       ret = i915_gem_wait_for_idle(dev_priv,
-                                    I915_WAIT_LOCKED,
-                                    MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       /* Update all contexts now that we've stalled the submission. */
-       list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+       list_for_each_entry(ctx, &i915->contexts.list, link) {
                struct i915_gem_engines_iter it;
                struct intel_context *ce;
 
+               if (ctx == i915->kernel_context)
+                       continue;
+
                for_each_gem_engine(ce,
                                    i915_gem_context_lock_engines(ctx),
                                    it) {
-                       u32 *regs;
+                       GEM_BUG_ON(ce == ce->engine->kernel_context);
 
                        if (ce->engine->class != RENDER_CLASS)
                                continue;
 
-                       /* OA settings will be set upon first use */
-                       if (!ce->state)
-                               continue;
-
-                       regs = i915_gem_object_pin_map(ce->state->obj,
-                                                      map_type);
-                       if (IS_ERR(regs)) {
-                               i915_gem_context_unlock_engines(ctx);
-                               return PTR_ERR(regs);
-                       }
+                       err = intel_context_lock_pinned(ce);
+                       if (err)
+                               break;
 
-                       ce->state->obj->mm.dirty = true;
-                       regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+                       regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-                       gen8_update_reg_state_unlocked(ce, regs, oa_config);
+                       /* Otherwise OA settings will be set upon first use */
+                       if (intel_context_is_pinned(ce))
+                               err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs));
 
-                       i915_gem_object_unpin_map(ce->state->obj);
+                       intel_context_unlock_pinned(ce);
+                       if (err)
+                               break;
                }
                i915_gem_context_unlock_engines(ctx);
+               if (err)
+                       return err;
        }
 
        /*
-        * Apply the configuration by doing one context restore of the edited
-        * context image.
+        * After updating all other contexts, we need to modify ourselves.
+        * If we don't modify the kernel_context, we do not get events while
+        * idle.
         */
-       rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
+       for_each_engine(engine, i915, id) {
+               struct intel_context *ce = engine->kernel_context;
 
-       i915_request_add(rq);
+               if (engine->class != RENDER_CLASS)
+                       continue;
+
+               regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
+
+               err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
+               if (err)
+                       return err;
+       }
 
        return 0;
 }