Merge drm/drm-next into drm-intel-next
[linux-block.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
index 3393f77f6726dd242f46cbc678c65d6e7cd5f6ff..ffc80d2e99526b0c65ede26d9781071a904be738 100644 (file)
@@ -167,12 +167,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
        _wa_add(wal, &wa);
 }
 
+static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+                      u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+       struct i915_wa wa = {
+               .mcr_reg = reg,
+               .clr  = clear,
+               .set  = set,
+               .read = read_mask,
+               .masked_reg = masked_reg,
+               .is_mcr = 1,
+       };
+
+       _wa_add(wal, &wa);
+}
+
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
        wa_add(wal, reg, clear, set, clear, false);
 }
 
+static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
+{
+       wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
 static void
 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -185,12 +206,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
        wa_write_clr_set(wal, reg, set, set);
 }
 
+static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
+{
+       wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
 static void
 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 {
        wa_write_clr_set(wal, reg, clr, 0);
 }
 
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
+{
+       wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
 /*
  * WA operations on "masked register". A masked register has the upper 16 bits
  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -208,12 +241,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
        wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
        wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
                    u32 mask, u32 val)
@@ -221,6 +266,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
        wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+                       u32 mask, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
                                      struct i915_wa_list *wal)
 {
@@ -242,8 +294,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
        wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 
        /* WaDisablePartialInstShootdown:bdw,chv */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
        /* Use Force Non-Coherent whenever executing a 3D context. This is a
         * workaround for a possible hang in the unlikely event a TLB
@@ -289,18 +341,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen8_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
        /* WaDisableDopClockGating:bdw
         *
         * Also see the related UCGTCL1 write in bdw_init_clock_gating()
         * to disable EUTC clock gating.
         */
-       wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                    DOP_CLOCK_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                        DOP_CLOCK_GATING_DISABLE);
 
-       wa_masked_en(wal, HALF_SLICE_CHICKEN3,
-                    GEN8_SAMPLER_POWER_BYPASS_DIS);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+                        GEN8_SAMPLER_POWER_BYPASS_DIS);
 
        wa_masked_en(wal, HDC_CHICKEN0,
                     /* WaForceContextSaveRestoreNonCoherent:bdw */
@@ -315,7 +367,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen8_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:chv */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
        /* Improve HiZ throughput on CHV. */
        wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
@@ -334,21 +386,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
                 */
                wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
                             GEN9_PBE_COMPRESSED_HASH_SELECTION);
-               wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                            GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+               wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                                GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
        }
 
        /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
        /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    FLOW_CONTROL_ENABLE |
-                    PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        FLOW_CONTROL_ENABLE |
+                        PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
        /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
        /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
-       wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                    GEN9_ENABLE_YV12_BUGFIX |
-                    GEN9_ENABLE_GPGPU_PREEMPTION);
+       wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                        GEN9_ENABLE_YV12_BUGFIX |
+                        GEN9_ENABLE_GPGPU_PREEMPTION);
 
        /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
        /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
@@ -357,8 +409,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
                     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 
        /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
-       wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
-                     GEN9_CCS_TLB_PREFETCH_ENABLE);
+       wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
+                         GEN9_CCS_TLB_PREFETCH_ENABLE);
 
        /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
        wa_masked_en(wal, HDC_CHICKEN0,
@@ -387,11 +439,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
            IS_KABYLAKE(i915) ||
            IS_COFFEELAKE(i915) ||
            IS_COMETLAKE(i915))
-               wa_masked_en(wal, HALF_SLICE_CHICKEN3,
-                            GEN8_SAMPLER_POWER_BYPASS_DIS);
+               wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+                                GEN8_SAMPLER_POWER_BYPASS_DIS);
 
        /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
-       wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+       wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
        /*
         * Supporting preemption with fine-granularity requires changes in the
@@ -470,8 +522,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen9_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:bxt */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        STALL_DOP_GATING_DISABLE);
 
        /* WaToEnableHwFixForPushConstHWBug:bxt */
        wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
@@ -491,8 +543,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
                             GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
        /* WaDisableSbeCacheDispatchPortSharing:kbl */
-       wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                    GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -515,8 +567,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
                     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
        /* WaDisableSbeCacheDispatchPortSharing:cfl */
-       wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                    GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -535,13 +587,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
         * (the register is whitelisted in hardware now, so UMDs can opt in
         * for coherency if they have a good reason).
         */
-       wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+       wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 
        /* WaEnableFloatBlendOptimization:icl */
-       wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-              _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
-              0 /* write-only, so skip validation */,
-              true);
+       wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                  _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+                  0 /* write-only, so skip validation */,
+                  true);
 
        /* WaDisableGPGPUMidThreadPreemption:icl */
        wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -549,8 +601,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
                            GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 
        /* allow headerless messages for preemptible GPGPU context */
-       wa_masked_en(wal, GEN10_SAMPLER_MODE,
-                    GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+       wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+                        GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 
        /* Wa_1604278689:icl,ehl */
        wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
@@ -559,7 +611,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
                         0xFFFFFFFF);
 
        /* Wa_1406306137:icl,ehl */
-       wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
+       wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
 /*
@@ -570,13 +622,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
                                   struct i915_wa_list *wal)
 {
        wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
-       wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
-                        REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
-       wa_add(wal,
-              FF_MODE2,
-              FF_MODE2_TDS_TIMER_MASK,
-              FF_MODE2_TDS_TIMER_128,
-              0, false);
+       wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+                            REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+       wa_mcr_add(wal,
+                  XEHP_FF_MODE2,
+                  FF_MODE2_TDS_TIMER_MASK,
+                  FF_MODE2_TDS_TIMER_128,
+                  0, false);
 }
 
 /*
@@ -600,7 +652,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
         * verification is ignored.
         */
        wa_add(wal,
-              FF_MODE2,
+              GEN12_FF_MODE2,
               FF_MODE2_TDS_TIMER_MASK,
               FF_MODE2_TDS_TIMER_128,
               0, false);
@@ -609,6 +661,8 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
                                       struct i915_wa_list *wal)
 {
+       struct drm_i915_private *i915 = engine->i915;
+
        gen12_ctx_gt_tuning_init(engine, wal);
 
        /*
@@ -638,10 +692,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
         * to Wa_1608008084.
         */
        wa_add(wal,
-              FF_MODE2,
+              GEN12_FF_MODE2,
               FF_MODE2_GS_TIMER_MASK,
               FF_MODE2_GS_TIMER_224,
               0, false);
+
+       if (!IS_DG1(i915))
+               /* Wa_1806527549 */
+               wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
 }
 
 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -665,27 +723,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
        /* Wa_16011186671:dg2_g11 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
-               wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
-               wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+               wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+               wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
        }
 
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_14010469329:dg2_g10 */
-               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
-                            XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+               wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+                                XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
 
                /*
                 * Wa_22010465075:dg2_g10
                 * Wa_22010613112:dg2_g10
                 * Wa_14010698770:dg2_g10
                 */
-               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
-                            GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+               wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+                                GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
        }
 
        /* Wa_16013271637:dg2 */
-       wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
-                    MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+       wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+                        MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 
        /* Wa_14014947963:dg2 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
@@ -1077,18 +1135,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
        wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
 }
 
-static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
-                        unsigned int slice, unsigned int subslice)
+static void debug_dump_steering(struct intel_gt *gt)
 {
        struct drm_printer p = drm_debug_printer("MCR Steering:");
 
+       if (drm_debug_enabled(DRM_UT_DRIVER))
+               intel_gt_mcr_report_steering(&p, gt, false);
+}
+
+static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
+                        unsigned int slice, unsigned int subslice)
+{
        __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
 
        gt->default_steering.groupid = slice;
        gt->default_steering.instanceid = subslice;
 
-       if (drm_debug_enabled(DRM_UT_DRIVER))
-               intel_gt_mcr_report_steering(&p, gt, false);
+       debug_dump_steering(gt);
 }
 
 static void
@@ -1182,6 +1245,9 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
                gt->steering_table[MSLICE] = NULL;
        }
 
+       if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
+               gt->steering_table[GAM] = NULL;
+
        slice = __ffs(slice_mask);
        subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
                GEN_DSS_PER_GSLICE;
@@ -1199,6 +1265,13 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
         */
        __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
        __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
+
+       /*
+        * On DG2, GAM registers have a dedicated steering control register
+        * and must always be programmed to a hardcoded groupid of "1."
+        */
+       if (IS_DG2(gt->i915))
+               __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
 }
 
 static void
@@ -1255,22 +1328,22 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                    PSDUNIT_CLKGATE_DIS);
 
        /* Wa_1406680159:icl,ehl */
-       wa_write_or(wal,
-                   SUBSLICE_UNIT_LEVEL_CLKGATE,
-                   GWUNIT_CLKGATE_DIS);
+       wa_mcr_write_or(wal,
+                       GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+                       GWUNIT_CLKGATE_DIS);
 
        /* Wa_1607087056:icl,ehl,jsl */
        if (IS_ICELAKE(i915) ||
            IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /*
         * This is not a documented workaround, but rather an optimization
         * to reduce sampler power.
         */
-       wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+       wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 /*
@@ -1304,7 +1377,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        wa_14011060649(gt, wal);
 
        /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
-       wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+       wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 static void
@@ -1316,14 +1389,14 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 
        /* Wa_1409420604:tgl */
        if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
-               wa_write_or(wal,
-                           SUBSLICE_UNIT_LEVEL_CLKGATE2,
-                           CPSSUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal,
+                               SUBSLICE_UNIT_LEVEL_CLKGATE2,
+                               CPSSUNIT_CLKGATE_DIS);
 
        /* Wa_1607087056:tgl also know as BUG:1409180338 */
        if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /* Wa_1408615072:tgl[a0] */
@@ -1342,14 +1415,14 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        /* Wa_1607087056:dg1 */
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /* Wa_1409420604:dg1 */
        if (IS_DG1(i915))
-               wa_write_or(wal,
-                           SUBSLICE_UNIT_LEVEL_CLKGATE2,
-                           CPSSUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal,
+                               SUBSLICE_UNIT_LEVEL_CLKGATE2,
+                               CPSSUNIT_CLKGATE_DIS);
 
        /* Wa_1408615072:dg1 */
        /* Empirical testing shows this register is unaffected by engine reset. */
@@ -1366,7 +1439,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        xehp_init_mcr(gt, wal);
 
        /* Wa_1409757795:xehpsdv */
-       wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+       wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
 
        /* Wa_16011155590:xehpsdv */
        if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
@@ -1446,8 +1519,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                            CG3DDISCFEG_CLKGATE_DIS);
 
                /* Wa_14011006942:dg2 */
-               wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
-                           DSS_ROUTER_CLKGATE_DIS);
+               wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+                               DSS_ROUTER_CLKGATE_DIS);
        }
 
        if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
@@ -1458,7 +1531,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
 
                /* Wa_14011371254:dg2_g10 */
-               wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+               wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
 
                /* Wa_14011431319:dg2_g10 */
                wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
@@ -1494,21 +1567,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                            GAMEDIA_CLKGATE_DIS);
 
                /* Wa_14011028019:dg2_g10 */
-               wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
        }
 
        /* Wa_14014830051:dg2 */
-       wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+       wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
 
        /*
         * The following are not actually "workarounds" but rather
         * recommended tuning settings documented in the bspec's
         * performance guide section.
         */
-       wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+       wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
 
        /* Wa_14015795083 */
-       wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+       wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
 static void
@@ -1517,7 +1590,27 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        pvc_init_mcr(gt, wal);
 
        /* Wa_14015795083 */
-       wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+       wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+}
+
+static void
+xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       /* FIXME: Actual workarounds will be added in future patch(es) */
+
+       /*
+        * Unlike older platforms, we no longer setup implicit steering here;
+        * all MCR accesses are explicitly steered.
+        */
+       debug_dump_steering(gt);
+}
+
+static void
+xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       /* FIXME: Actual workarounds will be added in future patch(es) */
+
+       debug_dump_steering(gt);
 }
 
 static void
@@ -1525,7 +1618,18 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
        struct drm_i915_private *i915 = gt->i915;
 
-       if (IS_PONTEVECCHIO(i915))
+       if (gt->type == GT_MEDIA) {
+               if (MEDIA_VER(i915) >= 13)
+                       xelpmp_gt_workarounds_init(gt, wal);
+               else
+                       MISSING_CASE(MEDIA_VER(i915));
+
+               return;
+       }
+
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+               xelpg_gt_workarounds_init(gt, wal);
+       else if (IS_PONTEVECCHIO(i915))
                pvc_gt_workarounds_init(gt, wal);
        else if (IS_DG2(i915))
                dg2_gt_workarounds_init(gt, wal);
@@ -1629,14 +1733,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
                u32 val, old = 0;
 
                /* open-coded rmw due to steering */
-               old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
+               if (wa->clr)
+                       old = wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg);
                val = (old & ~wa->clr) | wa->set;
-               if (val != old || !wa->clr)
-                       intel_uncore_write_fw(uncore, wa->reg, val);
+               if (val != old || !wa->clr) {
+                       if (wa->is_mcr)
+                               intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
+                       else
+                               intel_uncore_write_fw(uncore, wa->reg, val);
+               }
 
-               if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-                       wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
-                                 wal->name, "application");
+               if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+                       u32 val = wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg);
+
+                       wa_verify(wa, val, wal->name, "application");
+               }
        }
 
        intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1665,8 +1780,9 @@ static bool wa_list_verify(struct intel_gt *gt,
        intel_uncore_forcewake_get__locked(uncore, fw);
 
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
-               ok &= wa_verify(wa,
-                               intel_gt_mcr_read_any_fw(gt, wa->reg),
+               ok &= wa_verify(wa, wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg),
                                wal->name, from);
 
        intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1712,12 +1828,36 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
        _wa_add(wal, &wa);
 }
 
+static void
+whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
+{
+       struct i915_wa wa = {
+               .mcr_reg = reg,
+               .is_mcr = 1,
+       };
+
+       if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
+               return;
+
+       if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
+               return;
+
+       wa.mcr_reg.reg |= flags;
+       _wa_add(wal, &wa);
+}
+
 static void
 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
 {
        whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
 }
 
+static void
+whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
+{
+       whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
+}
+
 static void gen9_whitelist_build(struct i915_wa_list *w)
 {
        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1743,7 +1883,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine)
        gen9_whitelist_build(w);
 
        /* WaDisableLSQCROPERFforOCL:skl */
-       whitelist_reg(w, GEN8_L3SQCREG4);
+       whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void bxt_whitelist_build(struct intel_engine_cs *engine)
@@ -1764,7 +1904,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine)
        gen9_whitelist_build(w);
 
        /* WaDisableLSQCROPERFforOCL:kbl */
-       whitelist_reg(w, GEN8_L3SQCREG4);
+       whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void glk_whitelist_build(struct intel_engine_cs *engine)
@@ -1829,10 +1969,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
        switch (engine->class) {
        case RENDER_CLASS:
                /* WaAllowUMDToModifyHalfSliceChicken7:icl */
-               whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+               whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
 
                /* WaAllowUMDToModifySamplerMode:icl */
-               whitelist_reg(w, GEN10_SAMPLER_MODE);
+               whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
 
                /* WaEnableStateCacheRedirectToCS:icl */
                whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
@@ -2108,24 +2248,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 
        if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14013392000:dg2_g11 */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
-
-               /* Wa_16011620976:dg2_g11 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
                /* Wa_1509727124:dg2 */
-               wa_masked_en(wal, GEN10_SAMPLER_MODE,
-                            SC_DISABLE_POWER_OPTIMIZATION_EBB);
+               wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+                                SC_DISABLE_POWER_OPTIMIZATION_EBB);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
            IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14012419201:dg2 */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
@@ -2134,13 +2271,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_22012826095:dg2
                 * Wa_22013059131:dg2
                 */
-               wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
-                                MAXREQS_PER_BANK,
-                                REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+               wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+                                    MAXREQS_PER_BANK,
+                                    REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
 
                /* Wa_22013059131:dg2 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0,
-                           FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+                               FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
        }
 
        /* Wa_1308578152:dg2_g10 when first gslice is fused off */
@@ -2153,19 +2290,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
                /* Wa_22013037850:dg2 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
-                           DISABLE_128B_EVICTION_COMMAND_UDW);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+                               DISABLE_128B_EVICTION_COMMAND_UDW);
 
                /* Wa_22012856258:dg2 */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                            GEN12_DISABLE_READ_SUPPRESSION);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                                GEN12_DISABLE_READ_SUPPRESSION);
 
                /*
                 * Wa_22010960976:dg2
                 * Wa_14013347512:dg2
                 */
-               wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
-                             LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+               wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
+                                 LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
@@ -2173,8 +2310,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_1608949956:dg2_g10
                 * Wa_14010198302:dg2_g10
                 */
-               wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                            MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                                MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
 
                /*
                 * Wa_14010918519:dg2_g10
@@ -2182,31 +2319,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
                 * so ignoring verification.
                 */
-               wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
-                      FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
-                      0, false);
+               wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+                          FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+                          0, false);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_22010430635:dg2 */
-               wa_masked_en(wal,
-                            GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_GRF_CLEAR);
+               wa_mcr_masked_en(wal,
+                                GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_GRF_CLEAR);
 
                /* Wa_14010648519:dg2 */
-               wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+               wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
        }
 
        /* Wa_14013202645:dg2 */
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
            IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
-               wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+               wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
 
        /* Wa_22012532006:dg2 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
            IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
-               wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                            DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+               wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                                DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
 
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_14010680813:dg2_g10 */
@@ -2217,17 +2354,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
            IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14012362059:dg2 */
-               wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G10(i915)) {
                /* Wa_22014600077:dg2 */
-               wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-                      _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-                      0 /* Wa_14012342262 :write-only reg, so skip
-                           verification */,
-                      true);
+               wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                          _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+                          0 /* Wa_14012342262 write-only reg, so skip verification */,
+                          true);
        }
 
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2254,7 +2390,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
 
                /*
                 * Wa_1407928979:tgl A*
@@ -2283,14 +2419,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
            IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                            GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                                GEN12_PUSH_CONST_DEREF_HOLD_DIS);
 
                /*
                 * Wa_1409085225:tgl
                 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
                 */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
        }
 
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2314,9 +2450,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
            IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
                /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
-               wa_masked_en(wal,
-                            GEN10_SAMPLER_MODE,
-                            ENABLE_SMALLPL);
+               wa_mcr_masked_en(wal,
+                                GEN10_SAMPLER_MODE,
+                                ENABLE_SMALLPL);
        }
 
        if (GRAPHICS_VER(i915) == 11) {
@@ -2350,9 +2486,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_1405733216:icl
                 * Formerly known as WaDisableCleanEvicts
                 */
-               wa_write_or(wal,
-                           GEN8_L3SQCREG4,
-                           GEN11_LQSC_CLEAN_EVICT_DISABLE);
+               wa_mcr_write_or(wal,
+                               GEN8_L3SQCREG4,
+                               GEN11_LQSC_CLEAN_EVICT_DISABLE);
 
                /* Wa_1606682166:icl */
                wa_write_or(wal,
@@ -2360,10 +2496,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                            GEN7_DISABLE_SAMPLER_PREFETCH);
 
                /* Wa_1409178092:icl */
-               wa_write_clr_set(wal,
-                                GEN11_SCRATCH2,
-                                GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
-                                0);
+               wa_mcr_write_clr_set(wal,
+                                    GEN11_SCRATCH2,
+                                    GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
+                                    0);
 
                /* WaEnable32PlaneMode:icl */
                wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
@@ -2390,12 +2526,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                             FF_DOP_CLOCK_GATE_DISABLE);
        }
 
-       if (HAS_PERCTX_PREEMPT_CTRL(i915)) {
-               /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
+       /*
+        * Intel platforms that support fine-grained preemption (i.e., gen9 and
+        * beyond) allow the kernel-mode driver to choose between two different
+        * options for controlling preemption granularity and behavior.
+        *
+        * Option 1 (hardware default):
+        *   Preemption settings are controlled in a global manner via
+        *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
+        *   and settings chosen by the kernel-mode driver will apply to all
+        *   userspace clients.
+        *
+        * Option 2:
+        *   Preemption settings are controlled on a per-context basis via
+        *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
+        *   context switch and is writable by userspace (e.g., via
+        *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
+        *   which allows different userspace drivers/clients to select
+        *   different settings, or to change those settings on the fly in
+        *   response to runtime needs.  This option was known by name
+        *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
+        *   that name is somewhat misleading as other non-granularity
+        *   preemption settings are also impacted by this decision.
+        *
+        * On Linux, our policy has always been to let userspace drivers
+        * control preemption granularity/settings (Option 2).  This was
+        * originally mandatory on gen9 to prevent ABI breakage (old gen9
+        * userspace developed before object-level preemption was enabled would
+        * not behave well if i915 were to go with Option 1 and enable that
+        * preemption in a global manner).  On gen9 each context would have
+        * object-level preemption disabled by default (see
+        * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
+        * userspace drivers could opt-in to object-level preemption as they
+        * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
+        * even though it is no longer necessary for ABI compatibility when
+        * enabling a new platform, it does ensure that userspace will be able
+        * to implement any workarounds that show up requiring temporary
+        * adjustments to preemption behavior at runtime.
+        *
+        * Notes/Workarounds:
+        *  - Wa_14015141709:  On DG2 and early steppings of MTL,
+        *      CS_CHICKEN1[0] does not disable object-level preemption as
+        *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
+        *      using Option 1).  Effectively this means userspace is unable
+        *      to disable object-level preemption on these platforms/steppings
+        *      despite the setting here.
+        *
+        *  - Wa_16013994831:  May require that userspace program
+        *      CS_CHICKEN1[10] when certain runtime conditions are true.
+        *      Userspace requires Option 2 to be in effect for their update of
+        *      CS_CHICKEN1[10] to be effective.
+        *
+        * Other workarounds may appear in the future that will also require
+        * Option 2 behavior to allow proper userspace implementation.
+        */
+       if (GRAPHICS_VER(i915) >= 9)
                wa_masked_en(wal,
                             GEN7_FF_SLICE_CS_CHICKEN1,
                             GEN9_FFSC_PERCTX_PREEMPT_CTRL);
-       }
 
        if (IS_SKYLAKE(i915) ||
            IS_KABYLAKE(i915) ||
@@ -2421,36 +2609,36 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                             GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
 
                /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-               wa_write_or(wal,
-                           BDW_SCRATCH1,
-                           GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+               wa_mcr_write_or(wal,
+                               BDW_SCRATCH1,
+                               GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
                /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
                if (IS_GEN9_LP(i915))
-                       wa_write_clr_set(wal,
-                                        GEN8_L3SQCREG1,
-                                        L3_PRIO_CREDITS_MASK,
-                                        L3_GENERAL_PRIO_CREDITS(62) |
-                                        L3_HIGH_PRIO_CREDITS(2));
+                       wa_mcr_write_clr_set(wal,
+                                            GEN8_L3SQCREG1,
+                                            L3_PRIO_CREDITS_MASK,
+                                            L3_GENERAL_PRIO_CREDITS(62) |
+                                            L3_HIGH_PRIO_CREDITS(2));
 
                /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-               wa_write_or(wal,
-                           GEN8_L3SQCREG4,
-                           GEN8_LQSC_FLUSH_COHERENT_LINES);
+               wa_mcr_write_or(wal,
+                               GEN8_L3SQCREG4,
+                               GEN8_LQSC_FLUSH_COHERENT_LINES);
 
                /* Disable atomics in L3 to prevent unrecoverable hangs */
                wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
                                 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-               wa_write_clr_set(wal, GEN8_L3SQCREG4,
-                                GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-               wa_write_clr_set(wal, GEN9_SCRATCH1,
-                                EVICTION_PERF_FIX_ENABLE, 0);
+               wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
+                                    GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
+               wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
+                                    EVICTION_PERF_FIX_ENABLE, 0);
        }
 
        if (IS_HASWELL(i915)) {
                /* WaSampleCChickenBitEnable:hsw */
                wa_masked_en(wal,
-                            HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+                            HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
 
                wa_masked_dis(wal,
                              CACHE_MODE_0_GEN7,
@@ -2658,7 +2846,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
        if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
                /* Wa_14014999345:pvc */
-               wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
+               wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
        }
 }
 
@@ -2684,10 +2872,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
        }
 
        if (IS_DG2(i915)) {
-               wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-               wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
-               wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
-                                REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
+               wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+               wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
 
                /*
                 * This is also listed as Wa_22012654132 for certain DG2
@@ -2698,11 +2884,20 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
                 * back for verification on DG2 (due to Wa_14012342262), so
                 * we need to explicitly skip the readback.
                 */
-               wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-                      _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-                      0 /* write-only, so skip validation */,
-                      true);
+               wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                          _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+                          0 /* write-only, so skip validation */,
+                          true);
        }
+
+       /*
+        * This tuning setting proves beneficial only on ATS-M designs; the
+        * default "age based" setting is optimal on regular DG2 and other
+        * platforms.
+        */
+       if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
+               wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+                                       THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 }
 
 /*
@@ -2728,30 +2923,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
        if (IS_XEHPSDV(i915)) {
                /* Wa_1409954639 */
-               wa_masked_en(wal,
-                            GEN8_ROW_CHICKEN,
-                            SYSTOLIC_DOP_CLOCK_GATING_DIS);
+               wa_mcr_masked_en(wal,
+                                GEN8_ROW_CHICKEN,
+                                SYSTOLIC_DOP_CLOCK_GATING_DIS);
 
                /* Wa_1607196519 */
-               wa_masked_en(wal,
-                            GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_GRF_CLEAR);
+               wa_mcr_masked_en(wal,
+                                GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_GRF_CLEAR);
 
                /* Wa_14010670810:xehpsdv */
-               wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+               wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 
                /* Wa_14010449647:xehpsdv */
-               wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                            GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+               wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                                GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
 
                /* Wa_18011725039:xehpsdv */
                if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
-                       wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
-                       wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+                       wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+                       wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
                }
 
                /* Wa_14012362059:xehpsdv */
-               wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
                /* Wa_14014368820:xehpsdv */
                wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
@@ -2760,19 +2955,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
        if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
                /* Wa_14015227452:dg2,pvc */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
 
                /* Wa_22014226127:dg2,pvc */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
 
                /* Wa_16015675438:dg2,pvc */
                wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
 
                /* Wa_18018781329:dg2,pvc */
-               wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+       }
+
+       if (IS_DG2(i915)) {
+               /*
+                * Wa_16011620976:dg2_g11
+                * Wa_22015475538:dg2
+                */
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+               /* Wa_18017747507:dg2 */
+               wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
        }
 }