drm/xe: Dont skip TLB invalidations on VF
authorTejas Upadhyay <tejas.upadhyay@intel.com>
Thu, 10 Jul 2025 04:59:45 +0000 (10:29 +0530)
committerLucas De Marchi <lucas.demarchi@intel.com>
Thu, 17 Jul 2025 12:45:19 +0000 (09:45 -0300)
Skipping TLB invalidations on VF causing unrecoverable
faults. Probable reason for skipping TLB invalidations
on SRIOV could be lack of support for instruction
MI_FLUSH_DW_STORE_INDEX. Add back TLB flush with some
additional handling.

Helps in resolving,
[  704.913454] xe 0000:00:02.1: [drm:pf_queue_work_func [xe]]
                ASID: 0
                VFID: 0
                PDATA: 0x0d92
                Faulted Address: 0x0000000002fa0000
                FaultType: 0
                AccessType: 1
                FaultLevel: 0
                EngineClass: 3 bcs
                EngineInstance: 8
[  704.913551] xe 0000:00:02.1: [drm:pf_queue_work_func [xe]] Fault response: Unsuccessful -22

V2:
 - Use Xmas tree (MichalW)

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Fixes: 97515d0b3ed92 ("drm/xe/vf: Don't emit access to Global HWSP if VF")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250710045945.1023840-1-tejas.upadhyay@intel.com
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
(cherry picked from commit b528e896fa570844d654b5a4617a97fa770a1030)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
drivers/gpu/drm/xe/xe_ring_ops.c

index bc1689db4cd7165d4a6c52db84af5c693c8a8001..7b50c7c1ee21dac9f9f1bee19b95aa5d94d4ddea 100644 (file)
@@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
        return i;
 }
 
-static int emit_flush_invalidate(u32 *dw, int i)
+static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
 {
        dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
-                 MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
-       dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
-       dw[i++] = 0;
+                 MI_FLUSH_IMM_DW;
+
+       dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
        dw[i++] = 0;
+       dw[i++] = val;
 
        return i;
 }
@@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 static void emit_migration_job_gen12(struct xe_sched_job *job,
                                     struct xe_lrc *lrc, u32 seqno)
 {
+       u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
        u32 dw[MAX_JOB_SIZE_DW], i = 0;
 
        i = emit_copy_timestamp(lrc, dw, i);
 
-       i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-                               seqno, dw, i);
+       i = emit_store_imm_ggtt(saddr, seqno, dw, i);
 
        dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
 
        i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
 
-       if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
-               /* XXX: Do we need this? Leaving for now. */
-               dw[i++] = preparser_disable(true);
-               i = emit_flush_invalidate(dw, i);
-               dw[i++] = preparser_disable(false);
-       }
+       dw[i++] = preparser_disable(true);
+       i = emit_flush_invalidate(saddr, seqno, dw, i);
+       dw[i++] = preparser_disable(false);
 
        i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);