Merge tag 'drm-misc-next-2019-01-07-1' of git://anongit.freedesktop.org/drm/drm-misc...

[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_lrc.c
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 9399db3260ad9eb74f3379291464fbe0906482b4..d84c7815ee0ca4e400e3342fdc7d7635a08798d2 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -398,8 +398,13 @@ static u64 execlists_update_context(struct i915_request *rq)
          * may not be visible to the HW prior to the completion of the UC
          * register write and that we may begin execution from the context
          * before its image is complete leading to invalid PD chasing.
+        *
+        * Furthermore, Braswell, at least, wants a full mb to be sure that
+        * the writes are coherent in memory (visible to the GPU) prior to
+        * execution, and not just visible to other CPUs (as is the result of
+        * wmb).
          */
-       wmb();
+       mb();
         return ce->lrc_desc;
  }
  
@@ -767,6 +772,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
  
  static void reset_csb_pointers(struct intel_engine_execlists *execlists)
  {
+       const unsigned int reset_value = GEN8_CSB_ENTRIES - 1;
+
         /*
          * After a reset, the HW starts writing into CSB entry [0]. We
          * therefore have to set our HEAD pointer back one entry so that
@@ -776,8 +783,8 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
          * inline comparison of our cached head position against the last HW
          * write works even before the first interrupt.
          */
-       execlists->csb_head = execlists->csb_write_reset;
-       WRITE_ONCE(*execlists->csb_write, execlists->csb_write_reset);
+       execlists->csb_head = reset_value;
+       WRITE_ONCE(*execlists->csb_write, reset_value);
  }
  
  static void nop_submission_tasklet(unsigned long data)
@@ -818,8 +825,11 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
         /* Mark all executing requests as skipped. */
         list_for_each_entry(rq, &engine->timeline.requests, link) {
                 GEM_BUG_ON(!rq->global_seqno);
-               if (!i915_request_completed(rq))
-                       dma_fence_set_error(&rq->fence, -EIO);
+
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+                       continue;
+
+               dma_fence_set_error(&rq->fence, -EIO);
         }
  
         /* Flush the queued requests to the timeline list (for retiring). */
@@ -839,6 +849,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                         kmem_cache_free(engine->i915->priorities, p);
         }
  
+       intel_write_status_page(engine,
+                               I915_GEM_HWS_INDEX,
+                               intel_engine_last_submit(engine));
+
         /* Remaining _unready_ requests will be nop'ed when submitted */
  
         execlists->queue_priority = INT_MIN;
@@ -1279,9 +1293,10 @@ static int execlists_request_alloc(struct i915_request *request)
  static u32 *
  gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
  {
+       /* NB no one else is allowed to scribble over scratch + 256! */
         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+       *batch++ = i915_scratch_offset(engine->i915) + 256;
         *batch++ = 0;
  
         *batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1295,7 +1310,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
  
         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+       *batch++ = i915_scratch_offset(engine->i915) + 256;
         *batch++ = 0;
  
         return batch;
@@ -1332,7 +1347,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
                                        PIPE_CONTROL_GLOBAL_GTT_IVB |
                                        PIPE_CONTROL_CS_STALL |
                                        PIPE_CONTROL_QW_WRITE,
-                                      i915_ggtt_offset(engine->scratch) +
+                                      i915_scratch_offset(engine->i915) +
                                        2 * CACHELINE_BYTES);
  
         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1401,18 +1416,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
  
         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
  
-       /* WaClearSlmSpaceAtContextSwitch:kbl */
-       /* Actual scratch location is at 128 bytes offset */
-       if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
-               batch = gen8_emit_pipe_control(batch,
-                                              PIPE_CONTROL_FLUSH_L3 |
-                                              PIPE_CONTROL_GLOBAL_GTT_IVB |
-                                              PIPE_CONTROL_CS_STALL |
-                                              PIPE_CONTROL_QW_WRITE,
-                                              i915_ggtt_offset(engine->scratch)
-                                              + 2 * CACHELINE_BYTES);
-       }
-
         /* WaMediaPoolStateCmdInWABB:bxt,glk */
         if (HAS_POOLED_EU(engine->i915)) {
                 /*
@@ -1629,6 +1632,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
  
  static int gen8_init_common_ring(struct intel_engine_cs *engine)
  {
+       intel_engine_apply_workarounds(engine);
+
         intel_mocs_init_engine(engine);
  
         intel_engine_reset_breadcrumbs(engine);
@@ -1653,7 +1658,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
         if (ret)
                 return ret;
  
-       intel_whitelist_workarounds_apply(engine);
+       intel_engine_apply_whitelist(engine);
  
         /* We need to disable the AsyncFlip performance optimisations in order
          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
@@ -1676,7 +1681,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
         if (ret)
                 return ret;
  
-       intel_whitelist_workarounds_apply(engine);
+       intel_engine_apply_whitelist(engine);
  
         return 0;
  }
@@ -1974,7 +1979,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
  {
         struct intel_engine_cs *engine = request->engine;
         u32 scratch_addr =
-               i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+               i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
         bool vf_flush_wa = false, dc_flush_wa = false;
         u32 *cs, flags = 0;
         int len;
@@ -2088,7 +2093,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
  {
         int ret;
  
-       ret = intel_ctx_workarounds_emit(rq);
+       ret = intel_engine_emit_ctx_wa(rq);
         if (ret)
                 return ret;
  
@@ -2229,12 +2234,6 @@ logical_ring_setup(struct intel_engine_cs *engine)
         logical_ring_default_irqs(engine);
  }
  
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-       /* Older GVT emulation depends upon intercepting CSB mmio */
-       return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
-}
-
  static int logical_ring_init(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *i915 = engine->i915;
@@ -2264,24 +2263,12 @@ static int logical_ring_init(struct intel_engine_cs *engine)
                         upper_32_bits(ce->lrc_desc);
         }
  
-       execlists->csb_read =
-               i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
-       if (csb_force_mmio(i915)) {
-               execlists->csb_status = (u32 __force *)
-                       (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+       execlists->csb_status =
+               &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
  
-               execlists->csb_write = (u32 __force *)execlists->csb_read;
-               execlists->csb_write_reset =
-                       _MASKED_FIELD(GEN8_CSB_WRITE_PTR_MASK,
-                                     GEN8_CSB_ENTRIES - 1);
-       } else {
-               execlists->csb_status =
-                       &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+       execlists->csb_write =
+               &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
  
-               execlists->csb_write =
-                       &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
-               execlists->csb_write_reset = GEN8_CSB_ENTRIES - 1;
-       }
         reset_csb_pointers(execlists);
  
         return 0;
@@ -2311,10 +2298,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
         if (ret)
                 return ret;
  
-       ret = intel_engine_create_scratch(engine, PAGE_SIZE);
-       if (ret)
-               goto err_cleanup_common;
-
         ret = intel_init_workaround_bb(engine);
         if (ret) {
                 /*
@@ -2326,11 +2309,10 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
                           ret);
         }
  
-       return 0;
+       intel_engine_init_whitelist(engine);
+       intel_engine_init_workarounds(engine);
  
-err_cleanup_common:
-       intel_engine_cleanup_common(engine);
-       return ret;
+       return 0;
  }
  
  int logical_xcs_ring_init(struct intel_engine_cs *engine)