Merge tag 'drm-intel-next-2022-06-22' of git://anongit.freedesktop.org/drm/drm-intel...
[linux-block.git] / drivers / gpu / drm / i915 / gt / intel_execlists_submission.c
index 1c3fc4e5c3ca61d470364134dd576ad18a62e665..aa0d2bbbbcc41e3011a8baabf1098f00b912a136 100644 (file)
@@ -625,8 +625,6 @@ static void __execlists_schedule_out(struct i915_request * const rq,
                GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
                __set_bit(ccid - 1, &engine->context_tag);
        }
-
-       lrc_update_runtime(ce);
        intel_engine_context_out(engine);
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
        if (engine->fw_domain && !--engine->fw_active)
@@ -1651,12 +1649,6 @@ cancel_port_requests(struct intel_engine_execlists * const execlists,
        return inactive;
 }
 
-static void invalidate_csb_entries(const u64 *first, const u64 *last)
-{
-       clflush((void *)first);
-       clflush((void *)last);
-}
-
 /*
  * Starting with Gen12, the status has a new format:
  *
@@ -2004,15 +1996,30 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
         * the wash as hardware, working or not, will need to do the
         * invalidation before.
         */
-       invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
+       drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
 
        /*
         * We assume that any event reflects a change in context flow
         * and merits a fresh timeslice. We reinstall the timer after
         * inspecting the queue to see if we need to resumbit.
         */
-       if (*prev != *execlists->active) /* elide lite-restores */
+       if (*prev != *execlists->active) { /* elide lite-restores */
+               /*
+                * Note the inherent discrepancy between the HW runtime,
+                * recorded as part of the context switch, and the CPU
+                * adjustment for active contexts. We have to hope that
+                * the delay in processing the CS event is very small
+                * and consistent. It works to our advantage to have
+                * the CPU adjustment _undershoot_ (i.e. start later than)
+                * the CS timestamp so we never overreport the runtime
+                * and correct overselves later when updating from HW.
+                */
+               if (*prev)
+                       lrc_runtime_stop((*prev)->context);
+               if (*execlists->active)
+                       lrc_runtime_start((*execlists->active)->context);
                new_timeslice(execlists);
+       }
 
        return inactive;
 }
@@ -2236,11 +2243,11 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
        if (!cap->error)
                goto err_cap;
 
-       cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+       cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE);
        if (!cap->error->gt)
                goto err_gpu;
 
-       cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+       cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE);
        if (!cap->error->gt->engine)
                goto err_gt;
 
@@ -2644,7 +2651,7 @@ unwind:
 }
 
 static const struct intel_context_ops execlists_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
 
        .alloc = execlists_context_alloc,
 
@@ -2788,8 +2795,9 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 
        /* Check that the GPU does indeed update the CSB entries! */
        memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
-       invalidate_csb_entries(&execlists->csb_status[0],
-                              &execlists->csb_status[reset_value]);
+       drm_clflush_virt_range(execlists->csb_status,
+                              execlists->csb_size *
+                              sizeof(execlists->csb_status));
 
        /* Once more for luck and our trusty paranoia */
        ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
@@ -2833,7 +2841,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
        sanitize_hwsp(engine);
 
        /* And scrub the dirty cachelines for the HWSP */
-       clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+       drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
 
        intel_engine_reset_pinned_contexts(engine);
 }
@@ -2912,7 +2920,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
 
        enable_execlists(engine);
 
-       if (engine->class == RENDER_CLASS)
+       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
                xehp_enable_ccs_engines(engine);
 
        return 0;
@@ -2958,9 +2966,8 @@ reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
 
-       mb(); /* paranoia: read the CSB pointers from after the reset */
-       clflush(execlists->csb_write);
-       mb();
+       drm_clflush_virt_range(execlists->csb_write,
+                              sizeof(execlists->csb_write[0]));
 
        inactive = process_csb(engine, inactive); /* drain preemption events */
 
@@ -3426,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
                }
        }
 
-       if (intel_engine_has_preemption(engine))
-               engine->emit_bb_start = gen8_emit_bb_start;
-       else
-               engine->emit_bb_start = gen8_emit_bb_start_noarb;
+       if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+               if (intel_engine_has_preemption(engine))
+                       engine->emit_bb_start = gen125_emit_bb_start;
+               else
+                       engine->emit_bb_start = gen125_emit_bb_start_noarb;
+       } else {
+               if (intel_engine_has_preemption(engine))
+                       engine->emit_bb_start = gen8_emit_bb_start;
+               else
+                       engine->emit_bb_start = gen8_emit_bb_start_noarb;
+       }
 
        engine->busyness = execlists_engine_busyness;
 }
@@ -3702,7 +3716,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
 }
 
 static const struct intel_context_ops virtual_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
 
        .alloc = virtual_context_alloc,