drm/i915/gvt: init mmio by lri command in vgpu inhibit context
authorWeinan Li <weinan.z.li@intel.com>
Fri, 23 Feb 2018 06:46:45 +0000 (14:46 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Tue, 6 Mar 2018 05:19:23 +0000 (13:19 +0800)
There is one issue relates to Coarse Power Gating(CPG) on KBL NUC in GVT-g,
vgpu can't get the correct default context by updating the registers before
inhibit context submission. It always get back the hardware default value
unless the inhibit context submission happened before the 1st time
forcewake put. With this wrong default context, vgpu will run with
incorrect state and meet unknown issues.

The solution is initialize these mmios by adding lri command in ring buffer
of the inhibit context, then gpu hardware has no chance to go down RC6 when
lri commands are right being executed, and then vgpu can get correct
default context for further use.

v3:
- fix code fault, use 'for' to loop through mmio render list(Zhenyu)

v4:
- save the count of engine mmio need to be restored for inhibit context and
  refine some comments. (Kevin)

v5:
- code rebase

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/mmio_context.h
drivers/gpu/drm/i915/gvt/scheduler.c

index 2b28b523376d6a3de30cde037e5aef0ce299b4ea..9131638e3999bd65870c4afaa5844b2361803cb9 100644 (file)
@@ -310,7 +310,10 @@ struct intel_gvt {
        wait_queue_head_t service_thread_wq;
        unsigned long service_request;
 
-       struct engine_mmio *engine_mmio_list;
+       struct {
+               struct engine_mmio *mmio;
+               int ctx_mmio_count[I915_NUM_ENGINES];
+       } engine_mmio_list;
 
        struct dentry *debugfs_root;
 };
index 1bc1b28eb9e124d9f76ceb63c417a515916bc9c4..74a9c7b5516e728bfc6dd7c05f428ca184cf8093 100644 (file)
@@ -187,6 +187,153 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
        gen9_render_mocs.initialized = true;
 }
 
+static int
+restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
+                                struct i915_request *req)
+{
+       u32 *cs;
+       int ret;
+       struct engine_mmio *mmio;
+       struct intel_gvt *gvt = vgpu->gvt;
+       int ring_id = req->engine->id;
+       int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
+
+       if (count == 0)
+               return 0;
+
+       ret = req->engine->emit_flush(req, EMIT_BARRIER);
+       if (ret)
+               return ret;
+
+       cs = intel_ring_begin(req, count * 2 + 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_LOAD_REGISTER_IMM(count);
+       for (mmio = gvt->engine_mmio_list.mmio;
+            i915_mmio_reg_valid(mmio->reg); mmio++) {
+               if (mmio->ring_id != ring_id ||
+                   !mmio->in_context)
+                       continue;
+
+               *cs++ = i915_mmio_reg_offset(mmio->reg);
+               *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
+                               (mmio->mask << 16);
+               gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+                             *(cs-2), *(cs-1), vgpu->id, ring_id);
+       }
+
+       *cs++ = MI_NOOP;
+       intel_ring_advance(req, cs);
+
+       ret = req->engine->emit_flush(req, EMIT_BARRIER);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int
+restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
+                                       struct i915_request *req)
+{
+       unsigned int index;
+       u32 *cs;
+
+       cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+
+       for (index = 0; index < GEN9_MOCS_SIZE; index++) {
+               *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
+               *cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
+               gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+                             *(cs-2), *(cs-1), vgpu->id, req->engine->id);
+
+       }
+
+       *cs++ = MI_NOOP;
+       intel_ring_advance(req, cs);
+
+       return 0;
+}
+
+static int
+restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
+                                    struct i915_request *req)
+{
+       unsigned int index;
+       u32 *cs;
+
+       cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+
+       for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
+               *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
+               *cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
+               gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+                             *(cs-2), *(cs-1), vgpu->id, req->engine->id);
+
+       }
+
+       *cs++ = MI_NOOP;
+       intel_ring_advance(req, cs);
+
+       return 0;
+}
+
+/*
+ * Use lri command to initialize the mmio which is in context state image for
+ * inhibit context, it contains tracked engine mmio, render_mocs and
+ * render_mocs_l3cc.
+ */
+int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
+                                      struct i915_request *req)
+{
+       int ret;
+       u32 *cs;
+
+       cs = intel_ring_begin(req, 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+       *cs++ = MI_NOOP;
+       intel_ring_advance(req, cs);
+
+       ret = restore_context_mmio_for_inhibit(vgpu, req);
+       if (ret)
+               goto out;
+
+       /* no MOCS register in context except render engine */
+       if (req->engine->id != RCS)
+               goto out;
+
+       ret = restore_render_mocs_control_for_inhibit(vgpu, req);
+       if (ret)
+               goto out;
+
+       ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req);
+       if (ret)
+               goto out;
+
+out:
+       cs = intel_ring_begin(req, 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       *cs++ = MI_NOOP;
+       intel_ring_advance(req, cs);
+
+       return ret;
+}
+
 static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 {
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@@ -253,6 +400,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
        if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
                return;
 
+       if (IS_KABYLAKE(dev_priv) && ring_id == RCS)
+               return;
+
        if (!pre && !gen9_render_mocs.initialized)
                load_render_mocs(dev_priv);
 
@@ -319,10 +469,18 @@ static void switch_mmio(struct intel_vgpu *pre,
        if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
                switch_mocs(pre, next, ring_id);
 
-       for (mmio = dev_priv->gvt->engine_mmio_list;
+       for (mmio = dev_priv->gvt->engine_mmio_list.mmio;
             i915_mmio_reg_valid(mmio->reg); mmio++) {
                if (mmio->ring_id != ring_id)
                        continue;
+               /*
+                * No need to do save or restore of the mmio which is in context
+                * state image on kabylake, it's initialized by lri command and
+                * save or restore with context together.
+                */
+               if (IS_KABYLAKE(dev_priv) && mmio->in_context)
+                       continue;
+
                // save
                if (pre) {
                        vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
@@ -411,8 +569,16 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
  */
 void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
 {
+       struct engine_mmio *mmio;
+
        if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
-               gvt->engine_mmio_list = gen9_engine_mmio_list;
+               gvt->engine_mmio_list.mmio = gen9_engine_mmio_list;
        else
-               gvt->engine_mmio_list = gen8_engine_mmio_list;
+               gvt->engine_mmio_list.mmio = gen8_engine_mmio_list;
+
+       for (mmio = gvt->engine_mmio_list.mmio;
+            i915_mmio_reg_valid(mmio->reg); mmio++) {
+               if (mmio->in_context)
+                       gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++;
+       }
 }
index 4df87c7314c906093a52fb069da0dc1740949c69..0439eb8057a8a51068263a4272043d4463777c97 100644 (file)
@@ -51,4 +51,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
 
 bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
 
+int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
+                                      struct i915_request *req);
+
 #endif
index f4765ed4e92a463139a374f5db2ad8b1f03c1b46..9b92b4e25a200b5ce6692a48057f6ee38cd47d17 100644 (file)
@@ -225,6 +225,11 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
        struct intel_vgpu *vgpu = workload->vgpu;
        void *shadow_ring_buffer_va;
        u32 *cs;
+       struct i915_request *req = workload->req;
+
+       if (IS_KABYLAKE(req->i915) &&
+           is_inhibit_context(req->ctx, req->engine->id))
+               intel_vgpu_restore_inhibit_context(vgpu, req);
 
        /* allocate shadow ring buffer */
        cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));