drm/xe: Emit a render cache flush after each rcs/ccs batch
authorThomas Hellström <thomas.hellstrom@linux.intel.com>
Fri, 2 Jun 2023 12:44:23 +0000 (14:44 +0200)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:35:21 +0000 (18:35 -0500)
We need to flush render caches before fence signalling, where we might
release the memory for reuse. We can't rely on userspace doing this,
so flush render caches after the batch, but before user fence- and
dma_fence signalling.

Copy the cache flush from i915, but omit PIPE_CONTROL_FLUSH_L3, since it
should be implied by the other flushes. Also omit
PIPE_CONTROL_TLB_INVALIDATE since there should be no apparent need to
invalidate TLB after batch completion.

v2:
- Update Makefile for OOB WA.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Tested-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com> #1
Reported-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/Makefile
drivers/gpu/drm/xe/regs/xe_gpu_commands.h
drivers/gpu/drm/xe/xe_ring_ops.c
drivers/gpu/drm/xe/xe_wa_oob.rules

index c914d02d8a8c33b20c22a71cc23b8e4a49f983cc..73100c246a7402a34cf0c52ade46c7053ac90685 100644 (file)
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
        $(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_wa.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
index 1a744c508174e186dff0d99772e3f460e377f341..12120dd37aa2a493c99b4ac1a0ff7b46d360c709 100644 (file)
@@ -66,6 +66,9 @@
 #define   PVC_MS_MOCS_INDEX_MASK       GENMASK(6, 1)
 
 #define GFX_OP_PIPE_CONTROL(len)       ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+
+#define          PIPE_CONTROL0_HDC_PIPELINE_FLUSH              BIT(9)  /* gen12 */
+
 #define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE                (1<<29)
 #define   PIPE_CONTROL_TILE_CACHE_FLUSH                        (1<<28)
 #define   PIPE_CONTROL_AMFS_FLUSH                      (1<<25)
index 215606b5fae0edefea0a03085081983f89df0ef3..4cfd78e1ffa5838eccafc60ab8a174a98e20db91 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "xe_ring_ops.h"
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
@@ -16,6 +17,7 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 #include "xe_vm.h"
+#include "xe_wa.h"
 
 /*
  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
@@ -152,6 +154,37 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
        return i;
 }
 
+static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
+{
+       struct xe_gt *gt = job->engine->gt;
+       bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+       u32 flags;
+
+       flags = (PIPE_CONTROL_CS_STALL |
+                PIPE_CONTROL_TILE_CACHE_FLUSH |
+                PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                PIPE_CONTROL_DC_FLUSH_ENABLE |
+                PIPE_CONTROL_FLUSH_ENABLE);
+
+       if (XE_WA(gt, 1409600907))
+               flags |= PIPE_CONTROL_DEPTH_STALL;
+
+       if (lacks_render)
+               flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+       else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+               flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+       dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+       dw[i++] = flags;
+       dw[i++] = 0;
+       dw[i++] = 0;
+       dw[i++] = 0;
+       dw[i++] = 0;
+
+       return i;
+}
+
 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
                              int i)
 {
@@ -295,6 +328,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
        i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+       i = emit_render_cache_flush(job, dw, i);
+
        if (job->user_fence.used)
                i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
                                                job->user_fence.value,
index 1ecb10390b280f7b477685a9e5b799848be4b7f9..15c23813398a58ff5e60d0c04bfc90bd4aa80aff 100644 (file)
@@ -14,3 +14,4 @@
                SUBPLATFORM(DG2, G12)
 18020744125    PLATFORM(PVC)
 1509372804     PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
+1409600907     GRAPHICS_VERSION_RANGE(1200, 1250)