Merge tag 'drm-intel-gt-next-2023-03-16' of git://anongit.freedesktop.org/drm/drm...
[linux-block.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2018 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "i915_reg.h"
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
10 #include "intel_engine_regs.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt.h"
13 #include "intel_gt_mcr.h"
14 #include "intel_gt_regs.h"
15 #include "intel_ring.h"
16 #include "intel_workarounds.h"
17
18 /**
19  * DOC: Hardware workarounds
20  *
21  * Hardware workarounds are register programming documented to be executed in
22  * the driver that fall outside of the normal programming sequences for a
23  * platform. There are some basic categories of workarounds, depending on
24  * how/when they are applied:
25  *
26  * - Context workarounds: workarounds that touch registers that are
27  *   saved/restored to/from the HW context image. The list is emitted (via Load
28  *   Register Immediate commands) once when initializing the device and saved in
29  *   the default context. That default context is then used on every context
30  *   creation to have a "primed golden context", i.e. a context image that
31  *   already contains the changes needed to all the registers.
32  *
33  *   Context workarounds should be implemented in the \*_ctx_workarounds_init()
34  *   variants respective to the targeted platforms.
35  *
36  * - Engine workarounds: the list of these WAs is applied whenever the specific
37  *   engine is reset. It's also possible that a set of engine classes share a
38  *   common power domain and they are reset together. This happens on some
39  *   platforms with render and compute engines. In this case (at least) one of
40  *   them need to keeep the workaround programming: the approach taken in the
41  *   driver is to tie those workarounds to the first compute/render engine that
42  *   is registered.  When executing with GuC submission, engine resets are
43  *   outside of kernel driver control, hence the list of registers involved in
44  *   written once, on engine initialization, and then passed to GuC, that
45  *   saves/restores their values before/after the reset takes place. See
46  *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
47  *
48  *   Workarounds for registers specific to RCS and CCS should be implemented in
49  *   rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
50  *   registers belonging to BCS, VCS or VECS should be implemented in
51  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
52  *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
53  *   should be implemented in general_render_compute_wa_init().
54  *
55  * - GT workarounds: the list of these WAs is applied whenever these registers
56  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
57  *
58  *   GT workarounds should be implemented in the \*_gt_workarounds_init()
59  *   variants respective to the targeted platforms.
60  *
61  * - Register whitelist: some workarounds need to be implemented in userspace,
62  *   but need to touch privileged registers. The whitelist in the kernel
63  *   instructs the hardware to allow the access to happen. From the kernel side,
64  *   this is just a special case of a MMIO workaround (as we write the list of
65  *   these to/be-whitelisted registers to some special HW registers).
66  *
67  *   Register whitelisting should be done in the \*_whitelist_build() variants
68  *   respective to the targeted platforms.
69  *
70  * - Workaround batchbuffers: buffers that get executed automatically by the
71  *   hardware on every HW context restore. These buffers are created and
72  *   programmed in the default context so the hardware always go through those
73  *   programming sequences when switching contexts. The support for workaround
74  *   batchbuffers is enabled these hardware mechanisms:
75  *
76  *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
77  *      context, pointing the hardware to jump to that location when that offset
78  *      is reached in the context restore. Workaround batchbuffer in the driver
79  *      currently uses this mechanism for all platforms.
80  *
81  *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
82  *      pointing the hardware to a buffer to continue executing after the
83  *      engine registers are restored in a context restore sequence. This is
84  *      currently not used in the driver.
85  *
86  * - Other:  There are WAs that, due to their nature, cannot be applied from a
87  *   central place. Those are peppered around the rest of the code, as needed.
88  *   Workarounds related to the display IP are the main example.
89  *
90  * .. [1] Technically, some registers are powercontext saved & restored, so they
91  *    survive a suspend/resume. In practice, writing them again is not too
92  *    costly and simplifies things, so it's the approach taken in the driver.
93  */
94
95 static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
96                           const char *name, const char *engine_name)
97 {
98         wal->gt = gt;
99         wal->name = name;
100         wal->engine_name = engine_name;
101 }
102
103 #define WA_LIST_CHUNK (1 << 4)
104
105 static void wa_init_finish(struct i915_wa_list *wal)
106 {
107         /* Trim unused entries. */
108         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
109                 struct i915_wa *list = kmemdup(wal->list,
110                                                wal->count * sizeof(*list),
111                                                GFP_KERNEL);
112
113                 if (list) {
114                         kfree(wal->list);
115                         wal->list = list;
116                 }
117         }
118
119         if (!wal->count)
120                 return;
121
122         drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
123                 wal->wa_count, wal->name, wal->engine_name);
124 }
125
126 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
127 {
128         unsigned int addr = i915_mmio_reg_offset(wa->reg);
129         struct drm_i915_private *i915 = wal->gt->i915;
130         unsigned int start = 0, end = wal->count;
131         const unsigned int grow = WA_LIST_CHUNK;
132         struct i915_wa *wa_;
133
134         GEM_BUG_ON(!is_power_of_2(grow));
135
136         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
137                 struct i915_wa *list;
138
139                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
140                                      GFP_KERNEL);
141                 if (!list) {
142                         drm_err(&i915->drm, "No space for workaround init!\n");
143                         return;
144                 }
145
146                 if (wal->list) {
147                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
148                         kfree(wal->list);
149                 }
150
151                 wal->list = list;
152         }
153
154         while (start < end) {
155                 unsigned int mid = start + (end - start) / 2;
156
157                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
158                         start = mid + 1;
159                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
160                         end = mid;
161                 } else {
162                         wa_ = &wal->list[mid];
163
164                         if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
165                                 drm_err(&i915->drm,
166                                         "Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
167                                         i915_mmio_reg_offset(wa_->reg),
168                                         wa_->clr, wa_->set);
169
170                                 wa_->set &= ~wa->clr;
171                         }
172
173                         wal->wa_count++;
174                         wa_->set |= wa->set;
175                         wa_->clr |= wa->clr;
176                         wa_->read |= wa->read;
177                         return;
178                 }
179         }
180
181         wal->wa_count++;
182         wa_ = &wal->list[wal->count++];
183         *wa_ = *wa;
184
185         while (wa_-- > wal->list) {
186                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
187                            i915_mmio_reg_offset(wa_[1].reg));
188                 if (i915_mmio_reg_offset(wa_[1].reg) >
189                     i915_mmio_reg_offset(wa_[0].reg))
190                         break;
191
192                 swap(wa_[1], wa_[0]);
193         }
194 }
195
196 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
197                    u32 clear, u32 set, u32 read_mask, bool masked_reg)
198 {
199         struct i915_wa wa = {
200                 .reg  = reg,
201                 .clr  = clear,
202                 .set  = set,
203                 .read = read_mask,
204                 .masked_reg = masked_reg,
205         };
206
207         _wa_add(wal, &wa);
208 }
209
210 static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
211                        u32 clear, u32 set, u32 read_mask, bool masked_reg)
212 {
213         struct i915_wa wa = {
214                 .mcr_reg = reg,
215                 .clr  = clear,
216                 .set  = set,
217                 .read = read_mask,
218                 .masked_reg = masked_reg,
219                 .is_mcr = 1,
220         };
221
222         _wa_add(wal, &wa);
223 }
224
225 static void
226 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
227 {
228         wa_add(wal, reg, clear, set, clear, false);
229 }
230
231 static void
232 wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
233 {
234         wa_mcr_add(wal, reg, clear, set, clear, false);
235 }
236
237 static void
238 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
239 {
240         wa_write_clr_set(wal, reg, ~0, set);
241 }
242
243 static void
244 wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
245 {
246         wa_mcr_write_clr_set(wal, reg, ~0, set);
247 }
248
249 static void
250 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
251 {
252         wa_write_clr_set(wal, reg, set, set);
253 }
254
255 static void
256 wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
257 {
258         wa_mcr_write_clr_set(wal, reg, set, set);
259 }
260
261 static void
262 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
263 {
264         wa_write_clr_set(wal, reg, clr, 0);
265 }
266
267 static void
268 wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
269 {
270         wa_mcr_write_clr_set(wal, reg, clr, 0);
271 }
272
273 /*
274  * WA operations on "masked register". A masked register has the upper 16 bits
275  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
276  * portion of the register without a rmw: you simply write in the upper 16 bits
277  * the mask of bits you are going to modify.
278  *
279  * The wa_masked_* family of functions already does the necessary operations to
280  * calculate the mask based on the parameters passed, so user only has to
281  * provide the lower 16 bits of that register.
282  */
283
284 static void
285 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
286 {
287         wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
288 }
289
290 static void
291 wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
292 {
293         wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
294 }
295
296 static void
297 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
298 {
299         wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
300 }
301
302 static void
303 wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
304 {
305         wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
306 }
307
308 static void
309 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
310                     u32 mask, u32 val)
311 {
312         wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
313 }
314
315 static void
316 wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
317                         u32 mask, u32 val)
318 {
319         wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
320 }
321
322 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
323                                       struct i915_wa_list *wal)
324 {
325         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
326 }
327
328 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
329                                       struct i915_wa_list *wal)
330 {
331         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
332 }
333
334 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
335                                       struct i915_wa_list *wal)
336 {
337         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
338
339         /* WaDisableAsyncFlipPerfMode:bdw,chv */
340         wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
341
342         /* WaDisablePartialInstShootdown:bdw,chv */
343         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
344                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
345
346         /* Use Force Non-Coherent whenever executing a 3D context. This is a
347          * workaround for a possible hang in the unlikely event a TLB
348          * invalidation occurs during a PSD flush.
349          */
350         /* WaForceEnableNonCoherent:bdw,chv */
351         /* WaHdcDisableFetchWhenMasked:bdw,chv */
352         wa_masked_en(wal, HDC_CHICKEN0,
353                      HDC_DONOT_FETCH_MEM_WHEN_MASKED |
354                      HDC_FORCE_NON_COHERENT);
355
356         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
357          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
358          *  polygons in the same 8x4 pixel/sample area to be processed without
359          *  stalling waiting for the earlier ones to write to Hierarchical Z
360          *  buffer."
361          *
362          * This optimization is off by default for BDW and CHV; turn it on.
363          */
364         wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
365
366         /* Wa4x4STCOptimizationDisable:bdw,chv */
367         wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
368
369         /*
370          * BSpec recommends 8x4 when MSAA is used,
371          * however in practice 16x4 seems fastest.
372          *
373          * Note that PS/WM thread counts depend on the WIZ hashing
374          * disable bit, which we don't touch here, but it's good
375          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
376          */
377         wa_masked_field_set(wal, GEN7_GT_MODE,
378                             GEN6_WIZ_HASHING_MASK,
379                             GEN6_WIZ_HASHING_16x4);
380 }
381
382 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
383                                      struct i915_wa_list *wal)
384 {
385         struct drm_i915_private *i915 = engine->i915;
386
387         gen8_ctx_workarounds_init(engine, wal);
388
389         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
390         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
391
392         /* WaDisableDopClockGating:bdw
393          *
394          * Also see the related UCGTCL1 write in bdw_init_clock_gating()
395          * to disable EUTC clock gating.
396          */
397         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
398                          DOP_CLOCK_GATING_DISABLE);
399
400         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
401                          GEN8_SAMPLER_POWER_BYPASS_DIS);
402
403         wa_masked_en(wal, HDC_CHICKEN0,
404                      /* WaForceContextSaveRestoreNonCoherent:bdw */
405                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
406                      /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
407                      (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
408 }
409
410 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
411                                      struct i915_wa_list *wal)
412 {
413         gen8_ctx_workarounds_init(engine, wal);
414
415         /* WaDisableThreadStallDopClockGating:chv */
416         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
417
418         /* Improve HiZ throughput on CHV. */
419         wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
420 }
421
422 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
423                                       struct i915_wa_list *wal)
424 {
425         struct drm_i915_private *i915 = engine->i915;
426
427         if (HAS_LLC(i915)) {
428                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
429                  *
430                  * Must match Display Engine. See
431                  * WaCompressedResourceDisplayNewHashMode.
432                  */
433                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
434                              GEN9_PBE_COMPRESSED_HASH_SELECTION);
435                 wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
436                                  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
437         }
438
439         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
440         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
441         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
442                          FLOW_CONTROL_ENABLE |
443                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
444
445         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
446         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
447         wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
448                          GEN9_ENABLE_YV12_BUGFIX |
449                          GEN9_ENABLE_GPGPU_PREEMPTION);
450
451         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
452         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
453         wa_masked_en(wal, CACHE_MODE_1,
454                      GEN8_4x4_STC_OPTIMIZATION_DISABLE |
455                      GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
456
457         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
458         wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
459                           GEN9_CCS_TLB_PREFETCH_ENABLE);
460
461         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
462         wa_masked_en(wal, HDC_CHICKEN0,
463                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
464                      HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
465
466         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
467          * both tied to WaForceContextSaveRestoreNonCoherent
468          * in some hsds for skl. We keep the tie for all gen9. The
469          * documentation is a bit hazy and so we want to get common behaviour,
470          * even though there is no clear evidence we would need both on kbl/bxt.
471          * This area has been source of system hangs so we play it safe
472          * and mimic the skl regardless of what bspec says.
473          *
474          * Use Force Non-Coherent whenever executing a 3D context. This
475          * is a workaround for a possible hang in the unlikely event
476          * a TLB invalidation occurs during a PSD flush.
477          */
478
479         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
480         wa_masked_en(wal, HDC_CHICKEN0,
481                      HDC_FORCE_NON_COHERENT);
482
483         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
484         if (IS_SKYLAKE(i915) ||
485             IS_KABYLAKE(i915) ||
486             IS_COFFEELAKE(i915) ||
487             IS_COMETLAKE(i915))
488                 wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
489                                  GEN8_SAMPLER_POWER_BYPASS_DIS);
490
491         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
492         wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
493
494         /*
495          * Supporting preemption with fine-granularity requires changes in the
496          * batch buffer programming. Since we can't break old userspace, we
497          * need to set our default preemption level to safe value. Userspace is
498          * still able to use more fine-grained preemption levels, since in
499          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
500          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
501          * not real HW workarounds, but merely a way to start using preemption
502          * while maintaining old contract with userspace.
503          */
504
505         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
506         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
507
508         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
509         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
510                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
511                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
512
513         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
514         if (IS_GEN9_LP(i915))
515                 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
516 }
517
518 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
519                                 struct i915_wa_list *wal)
520 {
521         struct intel_gt *gt = engine->gt;
522         u8 vals[3] = { 0, 0, 0 };
523         unsigned int i;
524
525         for (i = 0; i < 3; i++) {
526                 u8 ss;
527
528                 /*
529                  * Only consider slices where one, and only one, subslice has 7
530                  * EUs
531                  */
532                 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
533                         continue;
534
535                 /*
536                  * subslice_7eu[i] != 0 (because of the check above) and
537                  * ss_max == 4 (maximum number of subslices possible per slice)
538                  *
539                  * ->    0 <= ss <= 3;
540                  */
541                 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
542                 vals[i] = 3 - ss;
543         }
544
545         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
546                 return;
547
548         /* Tune IZ hashing. See intel_device_info_runtime_init() */
549         wa_masked_field_set(wal, GEN7_GT_MODE,
550                             GEN9_IZ_HASHING_MASK(2) |
551                             GEN9_IZ_HASHING_MASK(1) |
552                             GEN9_IZ_HASHING_MASK(0),
553                             GEN9_IZ_HASHING(2, vals[2]) |
554                             GEN9_IZ_HASHING(1, vals[1]) |
555                             GEN9_IZ_HASHING(0, vals[0]));
556 }
557
558 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
559                                      struct i915_wa_list *wal)
560 {
561         gen9_ctx_workarounds_init(engine, wal);
562         skl_tune_iz_hashing(engine, wal);
563 }
564
565 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
566                                      struct i915_wa_list *wal)
567 {
568         gen9_ctx_workarounds_init(engine, wal);
569
570         /* WaDisableThreadStallDopClockGating:bxt */
571         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
572                          STALL_DOP_GATING_DISABLE);
573
574         /* WaToEnableHwFixForPushConstHWBug:bxt */
575         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
576                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
577 }
578
579 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
580                                      struct i915_wa_list *wal)
581 {
582         struct drm_i915_private *i915 = engine->i915;
583
584         gen9_ctx_workarounds_init(engine, wal);
585
586         /* WaToEnableHwFixForPushConstHWBug:kbl */
587         if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
588                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
589                              GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
590
591         /* WaDisableSbeCacheDispatchPortSharing:kbl */
592         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
593                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
594 }
595
596 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
597                                      struct i915_wa_list *wal)
598 {
599         gen9_ctx_workarounds_init(engine, wal);
600
601         /* WaToEnableHwFixForPushConstHWBug:glk */
602         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
603                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
604 }
605
606 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
607                                      struct i915_wa_list *wal)
608 {
609         gen9_ctx_workarounds_init(engine, wal);
610
611         /* WaToEnableHwFixForPushConstHWBug:cfl */
612         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
613                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
614
615         /* WaDisableSbeCacheDispatchPortSharing:cfl */
616         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
617                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
618 }
619
620 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
621                                      struct i915_wa_list *wal)
622 {
623         /* Wa_1406697149 (WaDisableBankHangMode:icl) */
624         wa_write(wal,
625                  GEN8_L3CNTLREG,
626                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
627                  GEN8_ERRDETBCTRL);
628
629         /* WaForceEnableNonCoherent:icl
630          * This is not the same workaround as in early Gen9 platforms, where
631          * lacking this could cause system hangs, but coherency performance
632          * overhead is high and only a few compute workloads really need it
633          * (the register is whitelisted in hardware now, so UMDs can opt in
634          * for coherency if they have a good reason).
635          */
636         wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
637
638         /* WaEnableFloatBlendOptimization:icl */
639         wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
640                    _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
641                    0 /* write-only, so skip validation */,
642                    true);
643
644         /* WaDisableGPGPUMidThreadPreemption:icl */
645         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
646                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
647                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
648
649         /* allow headerless messages for preemptible GPGPU context */
650         wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
651                          GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
652
653         /* Wa_1604278689:icl,ehl */
654         wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
655         wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
656                          0, /* write-only register; skip validation */
657                          0xFFFFFFFF);
658
659         /* Wa_1406306137:icl,ehl */
660         wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
661 }
662
663 /*
664  * These settings aren't actually workarounds, but general tuning settings that
665  * need to be programmed on dg2 platform.
666  */
667 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
668                                    struct i915_wa_list *wal)
669 {
670         wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
671         wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
672                              REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
673         wa_mcr_add(wal,
674                    XEHP_FF_MODE2,
675                    FF_MODE2_TDS_TIMER_MASK,
676                    FF_MODE2_TDS_TIMER_128,
677                    0, false);
678 }
679
680 /*
681  * These settings aren't actually workarounds, but general tuning settings that
682  * need to be programmed on several platforms.
683  */
684 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
685                                      struct i915_wa_list *wal)
686 {
687         /*
688          * Although some platforms refer to it as Wa_1604555607, we need to
689          * program it even on those that don't explicitly list that
690          * workaround.
691          *
692          * Note that the programming of this register is further modified
693          * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
694          * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
695          * value when read. The default value for this register is zero for all
696          * fields and there are no bit masks. So instead of doing a RMW we
697          * should just write TDS timer value. For the same reason read
698          * verification is ignored.
699          */
700         wa_add(wal,
701                GEN12_FF_MODE2,
702                FF_MODE2_TDS_TIMER_MASK,
703                FF_MODE2_TDS_TIMER_128,
704                0, false);
705 }
706
707 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
708                                        struct i915_wa_list *wal)
709 {
710         struct drm_i915_private *i915 = engine->i915;
711
712         gen12_ctx_gt_tuning_init(engine, wal);
713
714         /*
715          * Wa_1409142259:tgl,dg1,adl-p
716          * Wa_1409347922:tgl,dg1,adl-p
717          * Wa_1409252684:tgl,dg1,adl-p
718          * Wa_1409217633:tgl,dg1,adl-p
719          * Wa_1409207793:tgl,dg1,adl-p
720          * Wa_1409178076:tgl,dg1,adl-p
721          * Wa_1408979724:tgl,dg1,adl-p
722          * Wa_14010443199:tgl,rkl,dg1,adl-p
723          * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
724          * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
725          */
726         wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
727                      GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
728
729         /* WaDisableGPGPUMidThreadPreemption:gen12 */
730         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
731                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
732                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
733
734         /*
735          * Wa_16011163337
736          *
737          * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
738          * to Wa_1608008084.
739          */
740         wa_add(wal,
741                GEN12_FF_MODE2,
742                FF_MODE2_GS_TIMER_MASK,
743                FF_MODE2_GS_TIMER_224,
744                0, false);
745
746         if (!IS_DG1(i915)) {
747                 /* Wa_1806527549 */
748                 wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
749
750                 /* Wa_1606376872 */
751                 wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
752         }
753 }
754
755 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
756                                      struct i915_wa_list *wal)
757 {
758         gen12_ctx_workarounds_init(engine, wal);
759
760         /* Wa_1409044764 */
761         wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
762                       DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
763
764         /* Wa_22010493298 */
765         wa_masked_en(wal, HIZ_CHICKEN,
766                      DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
767 }
768
769 static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
770                                      struct i915_wa_list *wal)
771 {
772         dg2_ctx_gt_tuning_init(engine, wal);
773
774         /* Wa_16011186671:dg2_g11 */
775         if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
776                 wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
777                 wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
778         }
779
780         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
781                 /* Wa_14010469329:dg2_g10 */
782                 wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
783                                  XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
784
785                 /*
786                  * Wa_22010465075:dg2_g10
787                  * Wa_22010613112:dg2_g10
788                  * Wa_14010698770:dg2_g10
789                  */
790                 wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
791                                  GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
792         }
793
794         /* Wa_16013271637:dg2 */
795         wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
796                          MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
797
798         /* Wa_14014947963:dg2 */
799         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
800             IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
801                 wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
802
803         /* Wa_18018764978:dg2 */
804         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
805             IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
806                 wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
807
808         /* Wa_15010599737:dg2 */
809         wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
810
811         /* Wa_18019271663:dg2 */
812         wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
813 }
814
815 static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
816                                      struct i915_wa_list *wal)
817 {
818         struct drm_i915_private *i915 = engine->i915;
819
820         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
821             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
822                 /* Wa_14014947963 */
823                 wa_masked_field_set(wal, VF_PREEMPTION,
824                                     PREEMPTION_VERTEX_COUNT, 0x4000);
825
826                 /* Wa_16013271637 */
827                 wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
828                                  MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
829
830                 /* Wa_18019627453 */
831                 wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
832
833                 /* Wa_18018764978 */
834                 wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
835         }
836
837         /* Wa_18019271663 */
838         wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
839 }
840
841 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
842                                          struct i915_wa_list *wal)
843 {
844         /*
845          * This is a "fake" workaround defined by software to ensure we
846          * maintain reliable, backward-compatible behavior for userspace with
847          * regards to how nested MI_BATCH_BUFFER_START commands are handled.
848          *
849          * The per-context setting of MI_MODE[12] determines whether the bits
850          * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
851          * in the traditional manner or whether they should instead use a new
852          * tgl+ meaning that breaks backward compatibility, but allows nesting
853          * into 3rd-level batchbuffers.  When this new capability was first
854          * added in TGL, it remained off by default unless a context
855          * intentionally opted in to the new behavior.  However Xe_HPG now
856          * flips this on by default and requires that we explicitly opt out if
857          * we don't want the new behavior.
858          *
859          * From a SW perspective, we want to maintain the backward-compatible
860          * behavior for userspace, so we'll apply a fake workaround to set it
861          * back to the legacy behavior on platforms where the hardware default
862          * is to break compatibility.  At the moment there is no Linux
863          * userspace that utilizes third-level batchbuffers, so this will avoid
864          * userspace from needing to make any changes.  using the legacy
865          * meaning is the correct thing to do.  If/when we have userspace
866          * consumers that want to utilize third-level batch nesting, we can
867          * provide a context parameter to allow them to opt-in.
868          */
869         wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
870 }
871
872 static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
873                                    struct i915_wa_list *wal)
874 {
875         u8 mocs;
876
877         /*
878          * Some blitter commands do not have a field for MOCS, those
879          * commands will use MOCS index pointed by BLIT_CCTL.
880          * BLIT_CCTL registers are needed to be programmed to un-cached.
881          */
882         if (engine->class == COPY_ENGINE_CLASS) {
883                 mocs = engine->gt->mocs.uc_index;
884                 wa_write_clr_set(wal,
885                                  BLIT_CCTL(engine->mmio_base),
886                                  BLIT_CCTL_MASK,
887                                  BLIT_CCTL_MOCS(mocs, mocs));
888         }
889 }
890
891 /*
892  * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
893  * defined by the hardware team, but it programming general context registers.
894  * Adding those context register programming in context workaround
895  * allow us to use the wa framework for proper application and validation.
896  */
897 static void
898 gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
899                           struct i915_wa_list *wal)
900 {
901         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
902                 fakewa_disable_nestedbb_mode(engine, wal);
903
904         gen12_ctx_gt_mocs_init(engine, wal);
905 }
906
907 static void
908 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
909                            struct i915_wa_list *wal,
910                            const char *name)
911 {
912         struct drm_i915_private *i915 = engine->i915;
913
914         wa_init_start(wal, engine->gt, name, engine->name);
915
916         /* Applies to all engines */
917         /*
918          * Fake workarounds are not the actual workaround but
919          * programming of context registers using workaround framework.
920          */
921         if (GRAPHICS_VER(i915) >= 12)
922                 gen12_ctx_gt_fake_wa_init(engine, wal);
923
924         if (engine->class != RENDER_CLASS)
925                 goto done;
926
927         if (IS_METEORLAKE(i915))
928                 mtl_ctx_workarounds_init(engine, wal);
929         else if (IS_PONTEVECCHIO(i915))
930                 ; /* noop; none at this time */
931         else if (IS_DG2(i915))
932                 dg2_ctx_workarounds_init(engine, wal);
933         else if (IS_XEHPSDV(i915))
934                 ; /* noop; none at this time */
935         else if (IS_DG1(i915))
936                 dg1_ctx_workarounds_init(engine, wal);
937         else if (GRAPHICS_VER(i915) == 12)
938                 gen12_ctx_workarounds_init(engine, wal);
939         else if (GRAPHICS_VER(i915) == 11)
940                 icl_ctx_workarounds_init(engine, wal);
941         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
942                 cfl_ctx_workarounds_init(engine, wal);
943         else if (IS_GEMINILAKE(i915))
944                 glk_ctx_workarounds_init(engine, wal);
945         else if (IS_KABYLAKE(i915))
946                 kbl_ctx_workarounds_init(engine, wal);
947         else if (IS_BROXTON(i915))
948                 bxt_ctx_workarounds_init(engine, wal);
949         else if (IS_SKYLAKE(i915))
950                 skl_ctx_workarounds_init(engine, wal);
951         else if (IS_CHERRYVIEW(i915))
952                 chv_ctx_workarounds_init(engine, wal);
953         else if (IS_BROADWELL(i915))
954                 bdw_ctx_workarounds_init(engine, wal);
955         else if (GRAPHICS_VER(i915) == 7)
956                 gen7_ctx_workarounds_init(engine, wal);
957         else if (GRAPHICS_VER(i915) == 6)
958                 gen6_ctx_workarounds_init(engine, wal);
959         else if (GRAPHICS_VER(i915) < 8)
960                 ;
961         else
962                 MISSING_CASE(GRAPHICS_VER(i915));
963
964 done:
965         wa_init_finish(wal);
966 }
967
968 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
969 {
970         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
971 }
972
973 int intel_engine_emit_ctx_wa(struct i915_request *rq)
974 {
975         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
976         struct i915_wa *wa;
977         unsigned int i;
978         u32 *cs;
979         int ret;
980
981         if (wal->count == 0)
982                 return 0;
983
984         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
985         if (ret)
986                 return ret;
987
988         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
989         if (IS_ERR(cs))
990                 return PTR_ERR(cs);
991
992         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
993         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
994                 *cs++ = i915_mmio_reg_offset(wa->reg);
995                 *cs++ = wa->set;
996         }
997         *cs++ = MI_NOOP;
998
999         intel_ring_advance(rq, cs);
1000
1001         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1002         if (ret)
1003                 return ret;
1004
1005         return 0;
1006 }
1007
1008 static void
1009 gen4_gt_workarounds_init(struct intel_gt *gt,
1010                          struct i915_wa_list *wal)
1011 {
1012         /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1013         wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1014 }
1015
1016 static void
1017 g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1018 {
1019         gen4_gt_workarounds_init(gt, wal);
1020
1021         /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1022         wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
1023 }
1024
1025 static void
1026 ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1027 {
1028         g4x_gt_workarounds_init(gt, wal);
1029
1030         wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
1031 }
1032
1033 static void
1034 snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1035 {
1036 }
1037
1038 static void
1039 ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1040 {
1041         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
1042         wa_masked_dis(wal,
1043                       GEN7_COMMON_SLICE_CHICKEN1,
1044                       GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
1045
1046         /* WaApplyL3ControlAndL3ChickenMode:ivb */
1047         wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
1048         wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
1049
1050         /* WaForceL3Serialization:ivb */
1051         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1052 }
1053
1054 static void
1055 vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1056 {
1057         /* WaForceL3Serialization:vlv */
1058         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1059
1060         /*
1061          * WaIncreaseL3CreditsForVLVB0:vlv
1062          * This is the hardware default actually.
1063          */
1064         wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
1065 }
1066
1067 static void
1068 hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1069 {
1070         /* L3 caching of data atomics doesn't work -- disable it. */
1071         wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
1072
1073         wa_add(wal,
1074                HSW_ROW_CHICKEN3, 0,
1075                _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1076                0 /* XXX does this reg exist? */, true);
1077
1078         /* WaVSRefCountFullforceMissDisable:hsw */
1079         wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
1080 }
1081
1082 static void
1083 gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1084 {
1085         const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1086         unsigned int slice, subslice;
1087         u32 mcr, mcr_mask;
1088
1089         GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
1090
1091         /*
1092          * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
1093          * Before any MMIO read into slice/subslice specific registers, MCR
1094          * packet control register needs to be programmed to point to any
1095          * enabled s/ss pair. Otherwise, incorrect values will be returned.
1096          * This means each subsequent MMIO read will be forwarded to an
1097          * specific s/ss combination, but this is OK since these registers
1098          * are consistent across s/ss in almost all cases. In the rare
1099          * occasions, such as INSTDONE, where this value is dependent
1100          * on s/ss combo, the read should be done with read_subslice_reg.
1101          */
1102         slice = ffs(sseu->slice_mask) - 1;
1103         GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
1104         subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1105         GEM_BUG_ON(!subslice);
1106         subslice--;
1107
1108         /*
1109          * We use GEN8_MCR..() macros to calculate the |mcr| value for
1110          * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
1111          */
1112         mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1113         mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1114
1115         drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
1116
1117         wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1118 }
1119
1120 static void
1121 gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1122 {
1123         struct drm_i915_private *i915 = gt->i915;
1124
1125         /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
1126         gen9_wa_init_mcr(i915, wal);
1127
1128         /* WaDisableKillLogic:bxt,skl,kbl */
1129         if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1130                 wa_write_or(wal,
1131                             GAM_ECOCHK,
1132                             ECOCHK_DIS_TLB);
1133
1134         if (HAS_LLC(i915)) {
1135                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1136                  *
1137                  * Must match Display Engine. See
1138                  * WaCompressedResourceDisplayNewHashMode.
1139                  */
1140                 wa_write_or(wal,
1141                             MMCD_MISC_CTRL,
1142                             MMCD_PCLA | MMCD_HOTSPOT_EN);
1143         }
1144
1145         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1146         wa_write_or(wal,
1147                     GAM_ECOCHK,
1148                     BDW_DISABLE_HDC_INVALIDATION);
1149 }
1150
1151 static void
1152 skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1153 {
1154         gen9_gt_workarounds_init(gt, wal);
1155
1156         /* WaDisableGafsUnitClkGating:skl */
1157         wa_write_or(wal,
1158                     GEN7_UCGCTL4,
1159                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1160
1161         /* WaInPlaceDecompressionHang:skl */
1162         if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1163                 wa_write_or(wal,
1164                             GEN9_GAMT_ECO_REG_RW_IA,
1165                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1166 }
1167
1168 static void
1169 kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1170 {
1171         gen9_gt_workarounds_init(gt, wal);
1172
1173         /* WaDisableDynamicCreditSharing:kbl */
1174         if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1175                 wa_write_or(wal,
1176                             GAMT_CHKN_BIT_REG,
1177                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1178
1179         /* WaDisableGafsUnitClkGating:kbl */
1180         wa_write_or(wal,
1181                     GEN7_UCGCTL4,
1182                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1183
1184         /* WaInPlaceDecompressionHang:kbl */
1185         wa_write_or(wal,
1186                     GEN9_GAMT_ECO_REG_RW_IA,
1187                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1188 }
1189
1190 static void
1191 glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1192 {
1193         gen9_gt_workarounds_init(gt, wal);
1194 }
1195
1196 static void
1197 cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1198 {
1199         gen9_gt_workarounds_init(gt, wal);
1200
1201         /* WaDisableGafsUnitClkGating:cfl */
1202         wa_write_or(wal,
1203                     GEN7_UCGCTL4,
1204                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1205
1206         /* WaInPlaceDecompressionHang:cfl */
1207         wa_write_or(wal,
1208                     GEN9_GAMT_ECO_REG_RW_IA,
1209                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1210 }
1211
1212 static void __set_mcr_steering(struct i915_wa_list *wal,
1213                                i915_reg_t steering_reg,
1214                                unsigned int slice, unsigned int subslice)
1215 {
1216         u32 mcr, mcr_mask;
1217
1218         mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1219         mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1220
1221         wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
1222 }
1223
1224 static void debug_dump_steering(struct intel_gt *gt)
1225 {
1226         struct drm_printer p = drm_debug_printer("MCR Steering:");
1227
1228         if (drm_debug_enabled(DRM_UT_DRIVER))
1229                 intel_gt_mcr_report_steering(&p, gt, false);
1230 }
1231
1232 static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
1233                          unsigned int slice, unsigned int subslice)
1234 {
1235         __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1236
1237         gt->default_steering.groupid = slice;
1238         gt->default_steering.instanceid = subslice;
1239
1240         debug_dump_steering(gt);
1241 }
1242
1243 static void
1244 icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1245 {
1246         const struct sseu_dev_info *sseu = &gt->info.sseu;
1247         unsigned int subslice;
1248
1249         GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1250         GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1251
1252         /*
1253          * Although a platform may have subslices, we need to always steer
1254          * reads to the lowest instance that isn't fused off.  When Render
1255          * Power Gating is enabled, grabbing forcewake will only power up a
1256          * single subslice (the "minconfig") if there isn't a real workload
1257          * that needs to be run; this means that if we steer register reads to
1258          * one of the higher subslices, we run the risk of reading back 0's or
1259          * random garbage.
1260          */
1261         subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1262
1263         /*
1264          * If the subslice we picked above also steers us to a valid L3 bank,
1265          * then we can just rely on the default steering and won't need to
1266          * worry about explicitly re-steering L3BANK reads later.
1267          */
1268         if (gt->info.l3bank_mask & BIT(subslice))
1269                 gt->steering_table[L3BANK] = NULL;
1270
1271         __add_mcr_wa(gt, wal, 0, subslice);
1272 }
1273
1274 static void
1275 xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1276 {
1277         const struct sseu_dev_info *sseu = &gt->info.sseu;
1278         unsigned long slice, subslice = 0, slice_mask = 0;
1279         u32 lncf_mask = 0;
1280         int i;
1281
1282         /*
1283          * On Xe_HP the steering increases in complexity. There are now several
1284          * more units that require steering and we're not guaranteed to be able
1285          * to find a common setting for all of them. These are:
1286          * - GSLICE (fusable)
1287          * - DSS (sub-unit within gslice; fusable)
1288          * - L3 Bank (fusable)
1289          * - MSLICE (fusable)
1290          * - LNCF (sub-unit within mslice; always present if mslice is present)
1291          *
1292          * We'll do our default/implicit steering based on GSLICE (in the
1293          * sliceid field) and DSS (in the subsliceid field).  If we can
1294          * find overlap between the valid MSLICE and/or LNCF values with
1295          * a suitable GSLICE, then we can just re-use the default value and
1296          * skip and explicit steering at runtime.
1297          *
1298          * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1299          * a valid sliceid value.  DSS steering is the only type of steering
1300          * that utilizes the 'subsliceid' bits.
1301          *
1302          * Also note that, even though the steering domain is called "GSlice"
1303          * and it is encoded in the register using the gslice format, the spec
1304          * says that the combined (geometry | compute) fuse should be used to
1305          * select the steering.
1306          */
1307
1308         /* Find the potential gslice candidates */
1309         slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
1310                                                        GEN_DSS_PER_GSLICE);
1311
1312         /*
1313          * Find the potential LNCF candidates.  Either LNCF within a valid
1314          * mslice is fine.
1315          */
1316         for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
1317                 lncf_mask |= (0x3 << (i * 2));
1318
1319         /*
1320          * Are there any sliceid values that work for both GSLICE and LNCF
1321          * steering?
1322          */
1323         if (slice_mask & lncf_mask) {
1324                 slice_mask &= lncf_mask;
1325                 gt->steering_table[LNCF] = NULL;
1326         }
1327
1328         /* How about sliceid values that also work for MSLICE steering? */
1329         if (slice_mask & gt->info.mslice_mask) {
1330                 slice_mask &= gt->info.mslice_mask;
1331                 gt->steering_table[MSLICE] = NULL;
1332         }
1333
1334         if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
1335                 gt->steering_table[GAM] = NULL;
1336
1337         slice = __ffs(slice_mask);
1338         subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
1339                 GEN_DSS_PER_GSLICE;
1340
1341         __add_mcr_wa(gt, wal, slice, subslice);
1342
1343         /*
1344          * SQIDI ranges are special because they use different steering
1345          * registers than everything else we work with.  On XeHP SDV and
1346          * DG2-G10, any value in the steering registers will work fine since
1347          * all instances are present, but DG2-G11 only has SQIDI instances at
1348          * ID's 2 and 3, so we need to steer to one of those.  For simplicity
1349          * we'll just steer to a hardcoded "2" since that value will work
1350          * everywhere.
1351          */
1352         __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
1353         __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1354
1355         /*
1356          * On DG2, GAM registers have a dedicated steering control register
1357          * and must always be programmed to a hardcoded groupid of "1."
1358          */
1359         if (IS_DG2(gt->i915))
1360                 __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1361 }
1362
1363 static void
1364 pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1365 {
1366         unsigned int dss;
1367
1368         /*
1369          * Setup implicit steering for COMPUTE and DSS ranges to the first
1370          * non-fused-off DSS.  All other types of MCR registers will be
1371          * explicitly steered.
1372          */
1373         dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
1374         __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
1375 }
1376
1377 static void
1378 icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1379 {
1380         struct drm_i915_private *i915 = gt->i915;
1381
1382         icl_wa_init_mcr(gt, wal);
1383
1384         /* WaModifyGamTlbPartitioning:icl */
1385         wa_write_clr_set(wal,
1386                          GEN11_GACB_PERF_CTRL,
1387                          GEN11_HASH_CTRL_MASK,
1388                          GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1389
1390         /* Wa_1405766107:icl
1391          * Formerly known as WaCL2SFHalfMaxAlloc
1392          */
1393         wa_write_or(wal,
1394                     GEN11_LSN_UNSLCVC,
1395                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1396                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1397
1398         /* Wa_220166154:icl
1399          * Formerly known as WaDisCtxReload
1400          */
1401         wa_write_or(wal,
1402                     GEN8_GAMW_ECO_DEV_RW_IA,
1403                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1404
1405         /* Wa_1406463099:icl
1406          * Formerly known as WaGamTlbPendError
1407          */
1408         wa_write_or(wal,
1409                     GAMT_CHKN_BIT_REG,
1410                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
1411
1412         /*
1413          * Wa_1408615072:icl,ehl  (vsunit)
1414          * Wa_1407596294:icl,ehl  (hsunit)
1415          */
1416         wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1417                     VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1418
1419         /* Wa_1407352427:icl,ehl */
1420         wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1421                     PSDUNIT_CLKGATE_DIS);
1422
1423         /* Wa_1406680159:icl,ehl */
1424         wa_mcr_write_or(wal,
1425                         GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1426                         GWUNIT_CLKGATE_DIS);
1427
1428         /* Wa_1607087056:icl,ehl,jsl */
1429         if (IS_ICELAKE(i915) ||
1430             IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1431                 wa_write_or(wal,
1432                             GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1433                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1434
1435         /*
1436          * This is not a documented workaround, but rather an optimization
1437          * to reduce sampler power.
1438          */
1439         wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1440 }
1441
1442 /*
1443  * Though there are per-engine instances of these registers,
1444  * they retain their value through engine resets and should
1445  * only be provided on the GT workaround list rather than
1446  * the engine-specific workaround list.
1447  */
1448 static void
1449 wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1450 {
1451         struct intel_engine_cs *engine;
1452         int id;
1453
1454         for_each_engine(engine, gt, id) {
1455                 if (engine->class != VIDEO_DECODE_CLASS ||
1456                     (engine->instance % 2))
1457                         continue;
1458
1459                 wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
1460                             IECPUNIT_CLKGATE_DIS);
1461         }
1462 }
1463
1464 static void
1465 gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1466 {
1467         icl_wa_init_mcr(gt, wal);
1468
1469         /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1470         wa_14011060649(gt, wal);
1471
1472         /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1473         wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1474 }
1475
1476 static void
1477 dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1478 {
1479         gen12_gt_workarounds_init(gt, wal);
1480
1481         /* Wa_1409420604:dg1 */
1482         wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
1483                         CPSSUNIT_CLKGATE_DIS);
1484
1485         /* Wa_1408615072:dg1 */
1486         /* Empirical testing shows this register is unaffected by engine reset. */
1487         wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
1488 }
1489
1490 static void
1491 xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1492 {
1493         struct drm_i915_private *i915 = gt->i915;
1494
1495         xehp_init_mcr(gt, wal);
1496
1497         /* Wa_1409757795:xehpsdv */
1498         wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1499
1500         /* Wa_18011725039:xehpsdv */
1501         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
1502                 wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
1503                 wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
1504         }
1505
1506         /* Wa_16011155590:xehpsdv */
1507         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1508                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1509                             TSGUNIT_CLKGATE_DIS);
1510
1511         /* Wa_14011780169:xehpsdv */
1512         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
1513                 wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1514                             GAMTLBVDBOX7_CLKGATE_DIS |
1515                             GAMTLBVDBOX6_CLKGATE_DIS |
1516                             GAMTLBVDBOX5_CLKGATE_DIS |
1517                             GAMTLBVDBOX4_CLKGATE_DIS |
1518                             GAMTLBVDBOX3_CLKGATE_DIS |
1519                             GAMTLBVDBOX2_CLKGATE_DIS |
1520                             GAMTLBVDBOX1_CLKGATE_DIS |
1521                             GAMTLBVDBOX0_CLKGATE_DIS |
1522                             GAMTLBKCR_CLKGATE_DIS |
1523                             GAMTLBGUC_CLKGATE_DIS |
1524                             GAMTLBBLT_CLKGATE_DIS);
1525                 wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1526                             GAMTLBGFXA1_CLKGATE_DIS |
1527                             GAMTLBCOMPA0_CLKGATE_DIS |
1528                             GAMTLBCOMPA1_CLKGATE_DIS |
1529                             GAMTLBCOMPB0_CLKGATE_DIS |
1530                             GAMTLBCOMPB1_CLKGATE_DIS |
1531                             GAMTLBCOMPC0_CLKGATE_DIS |
1532                             GAMTLBCOMPC1_CLKGATE_DIS |
1533                             GAMTLBCOMPD0_CLKGATE_DIS |
1534                             GAMTLBCOMPD1_CLKGATE_DIS |
1535                             GAMTLBMERT_CLKGATE_DIS   |
1536                             GAMTLBVEBOX3_CLKGATE_DIS |
1537                             GAMTLBVEBOX2_CLKGATE_DIS |
1538                             GAMTLBVEBOX1_CLKGATE_DIS |
1539                             GAMTLBVEBOX0_CLKGATE_DIS);
1540         }
1541
1542         /* Wa_16012725990:xehpsdv */
1543         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
1544                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
1545
1546         /* Wa_14011060649:xehpsdv */
1547         wa_14011060649(gt, wal);
1548
1549         /* Wa_14012362059:xehpsdv */
1550         wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
1551
1552         /* Wa_14014368820:xehpsdv */
1553         wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1554                         INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1555
1556         /* Wa_14010670810:xehpsdv */
1557         wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1558 }
1559
1560 static void
1561 dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1562 {
1563         struct intel_engine_cs *engine;
1564         int id;
1565
1566         xehp_init_mcr(gt, wal);
1567
1568         /* Wa_14011060649:dg2 */
1569         wa_14011060649(gt, wal);
1570
1571         /*
1572          * Although there are per-engine instances of these registers,
1573          * they technically exist outside the engine itself and are not
1574          * impacted by engine resets.  Furthermore, they're part of the
1575          * GuC blacklist so trying to treat them as engine workarounds
1576          * will result in GuC initialization failure and a wedged GPU.
1577          */
1578         for_each_engine(engine, gt, id) {
1579                 if (engine->class != VIDEO_DECODE_CLASS)
1580                         continue;
1581
1582                 /* Wa_16010515920:dg2_g10 */
1583                 if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
1584                         wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
1585                                     ALNUNIT_CLKGATE_DIS);
1586         }
1587
1588         if (IS_DG2_G10(gt->i915)) {
1589                 /* Wa_22010523718:dg2 */
1590                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1591                             CG3DDISCFEG_CLKGATE_DIS);
1592
1593                 /* Wa_14011006942:dg2 */
1594                 wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1595                                 DSS_ROUTER_CLKGATE_DIS);
1596         }
1597
1598         if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
1599             IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
1600                 /* Wa_14012362059:dg2 */
1601                 wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
1602         }
1603
1604         if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
1605                 /* Wa_14010948348:dg2_g10 */
1606                 wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
1607
1608                 /* Wa_14011037102:dg2_g10 */
1609                 wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
1610
1611                 /* Wa_14011371254:dg2_g10 */
1612                 wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
1613
1614                 /* Wa_14011431319:dg2_g10 */
1615                 wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1616                             GAMTLBVDBOX7_CLKGATE_DIS |
1617                             GAMTLBVDBOX6_CLKGATE_DIS |
1618                             GAMTLBVDBOX5_CLKGATE_DIS |
1619                             GAMTLBVDBOX4_CLKGATE_DIS |
1620                             GAMTLBVDBOX3_CLKGATE_DIS |
1621                             GAMTLBVDBOX2_CLKGATE_DIS |
1622                             GAMTLBVDBOX1_CLKGATE_DIS |
1623                             GAMTLBVDBOX0_CLKGATE_DIS |
1624                             GAMTLBKCR_CLKGATE_DIS |
1625                             GAMTLBGUC_CLKGATE_DIS |
1626                             GAMTLBBLT_CLKGATE_DIS);
1627                 wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1628                             GAMTLBGFXA1_CLKGATE_DIS |
1629                             GAMTLBCOMPA0_CLKGATE_DIS |
1630                             GAMTLBCOMPA1_CLKGATE_DIS |
1631                             GAMTLBCOMPB0_CLKGATE_DIS |
1632                             GAMTLBCOMPB1_CLKGATE_DIS |
1633                             GAMTLBCOMPC0_CLKGATE_DIS |
1634                             GAMTLBCOMPC1_CLKGATE_DIS |
1635                             GAMTLBCOMPD0_CLKGATE_DIS |
1636                             GAMTLBCOMPD1_CLKGATE_DIS |
1637                             GAMTLBMERT_CLKGATE_DIS   |
1638                             GAMTLBVEBOX3_CLKGATE_DIS |
1639                             GAMTLBVEBOX2_CLKGATE_DIS |
1640                             GAMTLBVEBOX1_CLKGATE_DIS |
1641                             GAMTLBVEBOX0_CLKGATE_DIS);
1642
1643                 /* Wa_14010569222:dg2_g10 */
1644                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1645                             GAMEDIA_CLKGATE_DIS);
1646
1647                 /* Wa_14011028019:dg2_g10 */
1648                 wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
1649
1650                 /* Wa_14010680813:dg2_g10 */
1651                 wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
1652                                 CONTROL_BLOCK_CLKGATE_DIS |
1653                                 EGRESS_BLOCK_CLKGATE_DIS |
1654                                 TAG_BLOCK_CLKGATE_DIS);
1655         }
1656
1657         /* Wa_14014830051:dg2 */
1658         wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1659
1660         /* Wa_14015795083 */
1661         wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1662
1663         /* Wa_18018781329 */
1664         wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1665         wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1666         wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1667         wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1668
1669         /* Wa_1509235366:dg2 */
1670         wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1671                         INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1672
1673         /* Wa_14010648519:dg2 */
1674         wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1675 }
1676
1677 static void
1678 pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1679 {
1680         pvc_init_mcr(gt, wal);
1681
1682         /* Wa_14015795083 */
1683         wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1684
1685         /* Wa_18018781329 */
1686         wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1687         wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1688         wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1689         wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1690
1691         /* Wa_16016694945 */
1692         wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
1693 }
1694
1695 static void
1696 xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1697 {
1698         if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
1699             IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
1700                 /* Wa_14014830051 */
1701                 wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1702
1703                 /* Wa_18018781329 */
1704                 wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1705                 wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1706         }
1707
1708         /*
1709          * Unlike older platforms, we no longer setup implicit steering here;
1710          * all MCR accesses are explicitly steered.
1711          */
1712         debug_dump_steering(gt);
1713 }
1714
1715 static void
1716 xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1717 {
1718         if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0)) {
1719                 /*
1720                  * Wa_18018781329
1721                  *
1722                  * Note that although these registers are MCR on the primary
1723                  * GT, the media GT's versions are regular singleton registers.
1724                  */
1725                 wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
1726                 wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1727                 wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1728         }
1729
1730         debug_dump_steering(gt);
1731 }
1732
1733 /*
1734  * The bspec performance guide has recommended MMIO tuning settings.  These
1735  * aren't truly "workarounds" but we want to program them through the
1736  * workaround infrastructure to make sure they're (re)applied at the proper
1737  * times.
1738  *
1739  * The programming in this function is for settings that persist through
1740  * engine resets and also are not part of any engine's register state context.
1741  * I.e., settings that only need to be re-applied in the event of a full GT
1742  * reset.
1743  */
1744 static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
1745 {
1746         if (IS_PONTEVECCHIO(gt->i915)) {
1747                 wa_mcr_write(wal, XEHPC_L3SCRUB,
1748                              SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
1749                 wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
1750         }
1751
1752         if (IS_DG2(gt->i915)) {
1753                 wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1754                 wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1755         }
1756 }
1757
1758 static void
1759 gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1760 {
1761         struct drm_i915_private *i915 = gt->i915;
1762
1763         gt_tuning_settings(gt, wal);
1764
1765         if (gt->type == GT_MEDIA) {
1766                 if (MEDIA_VER(i915) >= 13)
1767                         xelpmp_gt_workarounds_init(gt, wal);
1768                 else
1769                         MISSING_CASE(MEDIA_VER(i915));
1770
1771                 return;
1772         }
1773
1774         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
1775                 xelpg_gt_workarounds_init(gt, wal);
1776         else if (IS_PONTEVECCHIO(i915))
1777                 pvc_gt_workarounds_init(gt, wal);
1778         else if (IS_DG2(i915))
1779                 dg2_gt_workarounds_init(gt, wal);
1780         else if (IS_XEHPSDV(i915))
1781                 xehpsdv_gt_workarounds_init(gt, wal);
1782         else if (IS_DG1(i915))
1783                 dg1_gt_workarounds_init(gt, wal);
1784         else if (GRAPHICS_VER(i915) == 12)
1785                 gen12_gt_workarounds_init(gt, wal);
1786         else if (GRAPHICS_VER(i915) == 11)
1787                 icl_gt_workarounds_init(gt, wal);
1788         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1789                 cfl_gt_workarounds_init(gt, wal);
1790         else if (IS_GEMINILAKE(i915))
1791                 glk_gt_workarounds_init(gt, wal);
1792         else if (IS_KABYLAKE(i915))
1793                 kbl_gt_workarounds_init(gt, wal);
1794         else if (IS_BROXTON(i915))
1795                 gen9_gt_workarounds_init(gt, wal);
1796         else if (IS_SKYLAKE(i915))
1797                 skl_gt_workarounds_init(gt, wal);
1798         else if (IS_HASWELL(i915))
1799                 hsw_gt_workarounds_init(gt, wal);
1800         else if (IS_VALLEYVIEW(i915))
1801                 vlv_gt_workarounds_init(gt, wal);
1802         else if (IS_IVYBRIDGE(i915))
1803                 ivb_gt_workarounds_init(gt, wal);
1804         else if (GRAPHICS_VER(i915) == 6)
1805                 snb_gt_workarounds_init(gt, wal);
1806         else if (GRAPHICS_VER(i915) == 5)
1807                 ilk_gt_workarounds_init(gt, wal);
1808         else if (IS_G4X(i915))
1809                 g4x_gt_workarounds_init(gt, wal);
1810         else if (GRAPHICS_VER(i915) == 4)
1811                 gen4_gt_workarounds_init(gt, wal);
1812         else if (GRAPHICS_VER(i915) <= 8)
1813                 ;
1814         else
1815                 MISSING_CASE(GRAPHICS_VER(i915));
1816 }
1817
1818 void intel_gt_init_workarounds(struct intel_gt *gt)
1819 {
1820         struct i915_wa_list *wal = &gt->wa_list;
1821
1822         wa_init_start(wal, gt, "GT", "global");
1823         gt_init_workarounds(gt, wal);
1824         wa_init_finish(wal);
1825 }
1826
1827 static enum forcewake_domains
1828 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1829 {
1830         enum forcewake_domains fw = 0;
1831         struct i915_wa *wa;
1832         unsigned int i;
1833
1834         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1835                 fw |= intel_uncore_forcewake_for_reg(uncore,
1836                                                      wa->reg,
1837                                                      FW_REG_READ |
1838                                                      FW_REG_WRITE);
1839
1840         return fw;
1841 }
1842
1843 static bool
1844 wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
1845           const char *name, const char *from)
1846 {
1847         if ((cur ^ wa->set) & wa->read) {
1848                 drm_err(&gt->i915->drm,
1849                         "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1850                         name, from, i915_mmio_reg_offset(wa->reg),
1851                         cur, cur & wa->read, wa->set & wa->read);
1852
1853                 return false;
1854         }
1855
1856         return true;
1857 }
1858
1859 static void wa_list_apply(const struct i915_wa_list *wal)
1860 {
1861         struct intel_gt *gt = wal->gt;
1862         struct intel_uncore *uncore = gt->uncore;
1863         enum forcewake_domains fw;
1864         unsigned long flags;
1865         struct i915_wa *wa;
1866         unsigned int i;
1867
1868         if (!wal->count)
1869                 return;
1870
1871         fw = wal_get_fw_for_rmw(uncore, wal);
1872
1873         intel_gt_mcr_lock(gt, &flags);
1874         spin_lock(&uncore->lock);
1875         intel_uncore_forcewake_get__locked(uncore, fw);
1876
1877         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1878                 u32 val, old = 0;
1879
1880                 /* open-coded rmw due to steering */
1881                 if (wa->clr)
1882                         old = wa->is_mcr ?
1883                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1884                                 intel_uncore_read_fw(uncore, wa->reg);
1885                 val = (old & ~wa->clr) | wa->set;
1886                 if (val != old || !wa->clr) {
1887                         if (wa->is_mcr)
1888                                 intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1889                         else
1890                                 intel_uncore_write_fw(uncore, wa->reg, val);
1891                 }
1892
1893                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1894                         u32 val = wa->is_mcr ?
1895                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1896                                 intel_uncore_read_fw(uncore, wa->reg);
1897
1898                         wa_verify(gt, wa, val, wal->name, "application");
1899                 }
1900         }
1901
1902         intel_uncore_forcewake_put__locked(uncore, fw);
1903         spin_unlock(&uncore->lock);
1904         intel_gt_mcr_unlock(gt, flags);
1905 }
1906
1907 void intel_gt_apply_workarounds(struct intel_gt *gt)
1908 {
1909         wa_list_apply(&gt->wa_list);
1910 }
1911
1912 static bool wa_list_verify(struct intel_gt *gt,
1913                            const struct i915_wa_list *wal,
1914                            const char *from)
1915 {
1916         struct intel_uncore *uncore = gt->uncore;
1917         struct i915_wa *wa;
1918         enum forcewake_domains fw;
1919         unsigned long flags;
1920         unsigned int i;
1921         bool ok = true;
1922
1923         fw = wal_get_fw_for_rmw(uncore, wal);
1924
1925         intel_gt_mcr_lock(gt, &flags);
1926         spin_lock(&uncore->lock);
1927         intel_uncore_forcewake_get__locked(uncore, fw);
1928
1929         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1930                 ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1931                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1932                                 intel_uncore_read_fw(uncore, wa->reg),
1933                                 wal->name, from);
1934
1935         intel_uncore_forcewake_put__locked(uncore, fw);
1936         spin_unlock(&uncore->lock);
1937         intel_gt_mcr_unlock(gt, flags);
1938
1939         return ok;
1940 }
1941
1942 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1943 {
1944         return wa_list_verify(gt, &gt->wa_list, from);
1945 }
1946
1947 __maybe_unused
1948 static bool is_nonpriv_flags_valid(u32 flags)
1949 {
1950         /* Check only valid flag bits are set */
1951         if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1952                 return false;
1953
1954         /* NB: Only 3 out of 4 enum values are valid for access field */
1955         if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1956             RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1957                 return false;
1958
1959         return true;
1960 }
1961
1962 static void
1963 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1964 {
1965         struct i915_wa wa = {
1966                 .reg = reg
1967         };
1968
1969         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1970                 return;
1971
1972         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1973                 return;
1974
1975         wa.reg.reg |= flags;
1976         _wa_add(wal, &wa);
1977 }
1978
1979 static void
1980 whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1981 {
1982         struct i915_wa wa = {
1983                 .mcr_reg = reg,
1984                 .is_mcr = 1,
1985         };
1986
1987         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1988                 return;
1989
1990         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1991                 return;
1992
1993         wa.mcr_reg.reg |= flags;
1994         _wa_add(wal, &wa);
1995 }
1996
1997 static void
1998 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1999 {
2000         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
2001 }
2002
2003 static void
2004 whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
2005 {
2006         whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
2007 }
2008
2009 static void gen9_whitelist_build(struct i915_wa_list *w)
2010 {
2011         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
2012         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
2013
2014         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
2015         whitelist_reg(w, GEN8_CS_CHICKEN1);
2016
2017         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
2018         whitelist_reg(w, GEN8_HDC_CHICKEN1);
2019
2020         /* WaSendPushConstantsFromMMIO:skl,bxt */
2021         whitelist_reg(w, COMMON_SLICE_CHICKEN2);
2022 }
2023
2024 static void skl_whitelist_build(struct intel_engine_cs *engine)
2025 {
2026         struct i915_wa_list *w = &engine->whitelist;
2027
2028         if (engine->class != RENDER_CLASS)
2029                 return;
2030
2031         gen9_whitelist_build(w);
2032
2033         /* WaDisableLSQCROPERFforOCL:skl */
2034         whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2035 }
2036
2037 static void bxt_whitelist_build(struct intel_engine_cs *engine)
2038 {
2039         if (engine->class != RENDER_CLASS)
2040                 return;
2041
2042         gen9_whitelist_build(&engine->whitelist);
2043 }
2044
2045 static void kbl_whitelist_build(struct intel_engine_cs *engine)
2046 {
2047         struct i915_wa_list *w = &engine->whitelist;
2048
2049         if (engine->class != RENDER_CLASS)
2050                 return;
2051
2052         gen9_whitelist_build(w);
2053
2054         /* WaDisableLSQCROPERFforOCL:kbl */
2055         whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2056 }
2057
2058 static void glk_whitelist_build(struct intel_engine_cs *engine)
2059 {
2060         struct i915_wa_list *w = &engine->whitelist;
2061
2062         if (engine->class != RENDER_CLASS)
2063                 return;
2064
2065         gen9_whitelist_build(w);
2066
2067         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2068         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2069 }
2070
2071 static void cfl_whitelist_build(struct intel_engine_cs *engine)
2072 {
2073         struct i915_wa_list *w = &engine->whitelist;
2074
2075         if (engine->class != RENDER_CLASS)
2076                 return;
2077
2078         gen9_whitelist_build(w);
2079
2080         /*
2081          * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2082          *
2083          * This covers 4 register which are next to one another :
2084          *   - PS_INVOCATION_COUNT
2085          *   - PS_INVOCATION_COUNT_UDW
2086          *   - PS_DEPTH_COUNT
2087          *   - PS_DEPTH_COUNT_UDW
2088          */
2089         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2090                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
2091                           RING_FORCE_TO_NONPRIV_RANGE_4);
2092 }
2093
2094 static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
2095 {
2096         struct i915_wa_list *w = &engine->whitelist;
2097
2098         if (engine->class != RENDER_CLASS)
2099                 whitelist_reg_ext(w,
2100                                   RING_CTX_TIMESTAMP(engine->mmio_base),
2101                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2102 }
2103
2104 static void cml_whitelist_build(struct intel_engine_cs *engine)
2105 {
2106         allow_read_ctx_timestamp(engine);
2107
2108         cfl_whitelist_build(engine);
2109 }
2110
2111 static void icl_whitelist_build(struct intel_engine_cs *engine)
2112 {
2113         struct i915_wa_list *w = &engine->whitelist;
2114
2115         allow_read_ctx_timestamp(engine);
2116
2117         switch (engine->class) {
2118         case RENDER_CLASS:
2119                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
2120                 whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2121
2122                 /* WaAllowUMDToModifySamplerMode:icl */
2123                 whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2124
2125                 /* WaEnableStateCacheRedirectToCS:icl */
2126                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2127
2128                 /*
2129                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2130                  *
2131                  * This covers 4 register which are next to one another :
2132                  *   - PS_INVOCATION_COUNT
2133                  *   - PS_INVOCATION_COUNT_UDW
2134                  *   - PS_DEPTH_COUNT
2135                  *   - PS_DEPTH_COUNT_UDW
2136                  */
2137                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2138                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
2139                                   RING_FORCE_TO_NONPRIV_RANGE_4);
2140                 break;
2141
2142         case VIDEO_DECODE_CLASS:
2143                 /* hucStatusRegOffset */
2144                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2145                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2146                 /* hucUKernelHdrInfoRegOffset */
2147                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2148                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2149                 /* hucStatus2RegOffset */
2150                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2151                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2152                 break;
2153
2154         default:
2155                 break;
2156         }
2157 }
2158
2159 static void tgl_whitelist_build(struct intel_engine_cs *engine)
2160 {
2161         struct i915_wa_list *w = &engine->whitelist;
2162
2163         allow_read_ctx_timestamp(engine);
2164
2165         switch (engine->class) {
2166         case RENDER_CLASS:
2167                 /*
2168                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2169                  * Wa_1408556865:tgl
2170                  *
2171                  * This covers 4 registers which are next to one another :
2172                  *   - PS_INVOCATION_COUNT
2173                  *   - PS_INVOCATION_COUNT_UDW
2174                  *   - PS_DEPTH_COUNT
2175                  *   - PS_DEPTH_COUNT_UDW
2176                  */
2177                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2178                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
2179                                   RING_FORCE_TO_NONPRIV_RANGE_4);
2180
2181                 /*
2182                  * Wa_1808121037:tgl
2183                  * Wa_14012131227:dg1
2184                  * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
2185                  */
2186                 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2187
2188                 /* Wa_1806527549:tgl */
2189                 whitelist_reg(w, HIZ_CHICKEN);
2190
2191                 /* Required by recommended tuning setting (not a workaround) */
2192                 whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
2193
2194                 break;
2195         default:
2196                 break;
2197         }
2198 }
2199
2200 static void dg2_whitelist_build(struct intel_engine_cs *engine)
2201 {
2202         struct i915_wa_list *w = &engine->whitelist;
2203
2204         switch (engine->class) {
2205         case RENDER_CLASS:
2206                 /*
2207                  * Wa_1507100340:dg2_g10
2208                  *
2209                  * This covers 4 registers which are next to one another :
2210                  *   - PS_INVOCATION_COUNT
2211                  *   - PS_INVOCATION_COUNT_UDW
2212                  *   - PS_DEPTH_COUNT
2213                  *   - PS_DEPTH_COUNT_UDW
2214                  */
2215                 if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
2216                         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2217                                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
2218                                           RING_FORCE_TO_NONPRIV_RANGE_4);
2219
2220                 /* Required by recommended tuning setting (not a workaround) */
2221                 whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2222
2223                 break;
2224         case COMPUTE_CLASS:
2225                 /* Wa_16011157294:dg2_g10 */
2226                 if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
2227                         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
2228                 break;
2229         default:
2230                 break;
2231         }
2232 }
2233
2234 static void blacklist_trtt(struct intel_engine_cs *engine)
2235 {
2236         struct i915_wa_list *w = &engine->whitelist;
2237
2238         /*
2239          * Prevent read/write access to [0x4400, 0x4600) which covers
2240          * the TRTT range across all engines. Note that normally userspace
2241          * cannot access the other engines' trtt control, but for simplicity
2242          * we cover the entire range on each engine.
2243          */
2244         whitelist_reg_ext(w, _MMIO(0x4400),
2245                           RING_FORCE_TO_NONPRIV_DENY |
2246                           RING_FORCE_TO_NONPRIV_RANGE_64);
2247         whitelist_reg_ext(w, _MMIO(0x4500),
2248                           RING_FORCE_TO_NONPRIV_DENY |
2249                           RING_FORCE_TO_NONPRIV_RANGE_64);
2250 }
2251
2252 static void pvc_whitelist_build(struct intel_engine_cs *engine)
2253 {
2254         /* Wa_16014440446:pvc */
2255         blacklist_trtt(engine);
2256 }
2257
2258 static void mtl_whitelist_build(struct intel_engine_cs *engine)
2259 {
2260         struct i915_wa_list *w = &engine->whitelist;
2261
2262         switch (engine->class) {
2263         case RENDER_CLASS:
2264                 /* Required by recommended tuning setting (not a workaround) */
2265                 whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2266
2267                 break;
2268         default:
2269                 break;
2270         }
2271 }
2272
2273 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2274 {
2275         struct drm_i915_private *i915 = engine->i915;
2276         struct i915_wa_list *w = &engine->whitelist;
2277
2278         wa_init_start(w, engine->gt, "whitelist", engine->name);
2279
2280         if (IS_METEORLAKE(i915))
2281                 mtl_whitelist_build(engine);
2282         else if (IS_PONTEVECCHIO(i915))
2283                 pvc_whitelist_build(engine);
2284         else if (IS_DG2(i915))
2285                 dg2_whitelist_build(engine);
2286         else if (IS_XEHPSDV(i915))
2287                 ; /* none needed */
2288         else if (GRAPHICS_VER(i915) == 12)
2289                 tgl_whitelist_build(engine);
2290         else if (GRAPHICS_VER(i915) == 11)
2291                 icl_whitelist_build(engine);
2292         else if (IS_COMETLAKE(i915))
2293                 cml_whitelist_build(engine);
2294         else if (IS_COFFEELAKE(i915))
2295                 cfl_whitelist_build(engine);
2296         else if (IS_GEMINILAKE(i915))
2297                 glk_whitelist_build(engine);
2298         else if (IS_KABYLAKE(i915))
2299                 kbl_whitelist_build(engine);
2300         else if (IS_BROXTON(i915))
2301                 bxt_whitelist_build(engine);
2302         else if (IS_SKYLAKE(i915))
2303                 skl_whitelist_build(engine);
2304         else if (GRAPHICS_VER(i915) <= 8)
2305                 ;
2306         else
2307                 MISSING_CASE(GRAPHICS_VER(i915));
2308
2309         wa_init_finish(w);
2310 }
2311
2312 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2313 {
2314         const struct i915_wa_list *wal = &engine->whitelist;
2315         struct intel_uncore *uncore = engine->uncore;
2316         const u32 base = engine->mmio_base;
2317         struct i915_wa *wa;
2318         unsigned int i;
2319
2320         if (!wal->count)
2321                 return;
2322
2323         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2324                 intel_uncore_write(uncore,
2325                                    RING_FORCE_TO_NONPRIV(base, i),
2326                                    i915_mmio_reg_offset(wa->reg));
2327
2328         /* And clear the rest just in case of garbage */
2329         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2330                 intel_uncore_write(uncore,
2331                                    RING_FORCE_TO_NONPRIV(base, i),
2332                                    i915_mmio_reg_offset(RING_NOPID(base)));
2333 }
2334
2335 /*
2336  * engine_fake_wa_init(), a place holder to program the registers
2337  * which are not part of an official workaround defined by the
2338  * hardware team.
2339  * Adding programming of those register inside workaround will
2340  * allow utilizing wa framework to proper application and verification.
2341  */
2342 static void
2343 engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2344 {
2345         u8 mocs_w, mocs_r;
2346
2347         /*
2348          * RING_CMD_CCTL specifies the default MOCS entry that will be used
2349          * by the command streamer when executing commands that don't have
2350          * a way to explicitly specify a MOCS setting.  The default should
2351          * usually reference whichever MOCS entry corresponds to uncached
2352          * behavior, although use of a WB cached entry is recommended by the
2353          * spec in certain circumstances on specific platforms.
2354          */
2355         if (GRAPHICS_VER(engine->i915) >= 12) {
2356                 mocs_r = engine->gt->mocs.uc_index;
2357                 mocs_w = engine->gt->mocs.uc_index;
2358
2359                 if (HAS_L3_CCS_READ(engine->i915) &&
2360                     engine->class == COMPUTE_CLASS) {
2361                         mocs_r = engine->gt->mocs.wb_index;
2362
2363                         /*
2364                          * Even on the few platforms where MOCS 0 is a
2365                          * legitimate table entry, it's never the correct
2366                          * setting to use here; we can assume the MOCS init
2367                          * just forgot to initialize wb_index.
2368                          */
2369                         drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
2370                 }
2371
2372                 wa_masked_field_set(wal,
2373                                     RING_CMD_CCTL(engine->mmio_base),
2374                                     CMD_CCTL_MOCS_MASK,
2375                                     CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2376         }
2377 }
2378
2379 static bool needs_wa_1308578152(struct intel_engine_cs *engine)
2380 {
2381         return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
2382                 GEN_DSS_PER_GSLICE;
2383 }
2384
2385 static void
2386 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2387 {
2388         struct drm_i915_private *i915 = engine->i915;
2389
2390         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2391             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
2392                 /* Wa_22014600077 */
2393                 wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2394                                  ENABLE_EU_COUNT_FOR_TDL_FLUSH);
2395         }
2396
2397         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2398             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2399             IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2400             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2401                 /* Wa_1509727124 */
2402                 wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2403                                  SC_DISABLE_POWER_OPTIMIZATION_EBB);
2404         }
2405
2406         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2407             IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
2408             IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
2409                 /* Wa_22012856258 */
2410                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2411                                  GEN12_DISABLE_READ_SUPPRESSION);
2412         }
2413
2414         if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2415                 /* Wa_14013392000:dg2_g11 */
2416                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
2417         }
2418
2419         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
2420             IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2421                 /* Wa_14012419201:dg2 */
2422                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
2423                                  GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
2424         }
2425
2426         /* Wa_1308578152:dg2_g10 when first gslice is fused off */
2427         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
2428             needs_wa_1308578152(engine)) {
2429                 wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
2430                               GEN12_REPLAY_MODE_GRANULARITY);
2431         }
2432
2433         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2434             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2435                 /*
2436                  * Wa_22010960976:dg2
2437                  * Wa_14013347512:dg2
2438                  */
2439                 wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
2440                                   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2441         }
2442
2443         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2444                 /*
2445                  * Wa_1608949956:dg2_g10
2446                  * Wa_14010198302:dg2_g10
2447                  */
2448                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
2449                                  MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
2450         }
2451
2452         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
2453                 /* Wa_22010430635:dg2 */
2454                 wa_mcr_masked_en(wal,
2455                                  GEN9_ROW_CHICKEN4,
2456                                  GEN12_DISABLE_GRF_CLEAR);
2457
2458         /* Wa_14013202645:dg2 */
2459         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
2460             IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
2461                 wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
2462
2463         /* Wa_22012532006:dg2 */
2464         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
2465             IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
2466                 wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
2467                                  DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
2468
2469         if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
2470             IS_DG2_G10(i915)) {
2471                 /* Wa_22014600077:dg2 */
2472                 wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
2473                            _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
2474                            0 /* Wa_14012342262 write-only reg, so skip verification */,
2475                            true);
2476         }
2477
2478         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2479             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2480                 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2481                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2482
2483                 /*
2484                  * Wa_1407928979:tgl A*
2485                  * Wa_18011464164:tgl[B0+],dg1[B0+]
2486                  * Wa_22010931296:tgl[B0+],dg1[B0+]
2487                  * Wa_14010919138:rkl,dg1,adl-s,adl-p
2488                  */
2489                 wa_write_or(wal, GEN7_FF_THREAD_MODE,
2490                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2491         }
2492
2493         if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
2494             IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2495                 /*
2496                  * Wa_1606700617:tgl,dg1,adl-p
2497                  * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2498                  * Wa_14010826681:tgl,dg1,rkl,adl-p
2499                  * Wa_18019627453:dg2
2500                  */
2501                 wa_masked_en(wal,
2502                              GEN9_CS_DEBUG_MODE1,
2503                              FF_DOP_CLOCK_GATE_DISABLE);
2504         }
2505
2506         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2507             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2508                 /* Wa_1409804808 */
2509                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2510                                  GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2511
2512                 /* Wa_14010229206 */
2513                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2514         }
2515
2516         if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2517                 /*
2518                  * Wa_1607297627
2519                  *
2520                  * On TGL and RKL there are multiple entries for this WA in the
2521                  * BSpec; some indicate this is an A0-only WA, others indicate
2522                  * it applies to all steppings so we trust the "all steppings."
2523                  */
2524                 wa_masked_en(wal,
2525                              RING_PSMI_CTL(RENDER_RING_BASE),
2526                              GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
2527                              GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2528         }
2529
2530         if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
2531             IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
2532                 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2533                 wa_mcr_masked_en(wal,
2534                                  GEN10_SAMPLER_MODE,
2535                                  ENABLE_SMALLPL);
2536         }
2537
2538         if (GRAPHICS_VER(i915) == 11) {
2539                 /* This is not an Wa. Enable for better image quality */
2540                 wa_masked_en(wal,
2541                              _3D_CHICKEN3,
2542                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
2543
2544                 /*
2545                  * Wa_1405543622:icl
2546                  * Formerly known as WaGAPZPriorityScheme
2547                  */
2548                 wa_write_or(wal,
2549                             GEN8_GARBCNTL,
2550                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
2551
2552                 /*
2553                  * Wa_1604223664:icl
2554                  * Formerly known as WaL3BankAddressHashing
2555                  */
2556                 wa_write_clr_set(wal,
2557                                  GEN8_GARBCNTL,
2558                                  GEN11_HASH_CTRL_EXCL_MASK,
2559                                  GEN11_HASH_CTRL_EXCL_BIT0);
2560                 wa_write_clr_set(wal,
2561                                  GEN11_GLBLINVL,
2562                                  GEN11_BANK_HASH_ADDR_EXCL_MASK,
2563                                  GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2564
2565                 /*
2566                  * Wa_1405733216:icl
2567                  * Formerly known as WaDisableCleanEvicts
2568                  */
2569                 wa_mcr_write_or(wal,
2570                                 GEN8_L3SQCREG4,
2571                                 GEN11_LQSC_CLEAN_EVICT_DISABLE);
2572
2573                 /* Wa_1606682166:icl */
2574                 wa_write_or(wal,
2575                             GEN7_SARCHKMD,
2576                             GEN7_DISABLE_SAMPLER_PREFETCH);
2577
2578                 /* Wa_1409178092:icl */
2579                 wa_mcr_write_clr_set(wal,
2580                                      GEN11_SCRATCH2,
2581                                      GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
2582                                      0);
2583
2584                 /* WaEnable32PlaneMode:icl */
2585                 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
2586                              GEN11_ENABLE_32_PLANE_MODE);
2587
2588                 /*
2589                  * Wa_1408767742:icl[a2..forever],ehl[all]
2590                  * Wa_1605460711:icl[a0..c0]
2591                  */
2592                 wa_write_or(wal,
2593                             GEN7_FF_THREAD_MODE,
2594                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2595
2596                 /* Wa_22010271021 */
2597                 wa_masked_en(wal,
2598                              GEN9_CS_DEBUG_MODE1,
2599                              FF_DOP_CLOCK_GATE_DISABLE);
2600         }
2601
2602         /*
2603          * Intel platforms that support fine-grained preemption (i.e., gen9 and
2604          * beyond) allow the kernel-mode driver to choose between two different
2605          * options for controlling preemption granularity and behavior.
2606          *
2607          * Option 1 (hardware default):
2608          *   Preemption settings are controlled in a global manner via
2609          *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
2610          *   and settings chosen by the kernel-mode driver will apply to all
2611          *   userspace clients.
2612          *
2613          * Option 2:
2614          *   Preemption settings are controlled on a per-context basis via
2615          *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
2616          *   context switch and is writable by userspace (e.g., via
2617          *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2618          *   which allows different userspace drivers/clients to select
2619          *   different settings, or to change those settings on the fly in
2620          *   response to runtime needs.  This option was known by name
2621          *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
2622          *   that name is somewhat misleading as other non-granularity
2623          *   preemption settings are also impacted by this decision.
2624          *
2625          * On Linux, our policy has always been to let userspace drivers
2626          * control preemption granularity/settings (Option 2).  This was
2627          * originally mandatory on gen9 to prevent ABI breakage (old gen9
2628          * userspace developed before object-level preemption was enabled would
2629          * not behave well if i915 were to go with Option 1 and enable that
2630          * preemption in a global manner).  On gen9 each context would have
2631          * object-level preemption disabled by default (see
2632          * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2633          * userspace drivers could opt-in to object-level preemption as they
2634          * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
2635          * even though it is no longer necessary for ABI compatibility when
2636          * enabling a new platform, it does ensure that userspace will be able
2637          * to implement any workarounds that show up requiring temporary
2638          * adjustments to preemption behavior at runtime.
2639          *
2640          * Notes/Workarounds:
2641          *  - Wa_14015141709:  On DG2 and early steppings of MTL,
2642          *      CS_CHICKEN1[0] does not disable object-level preemption as
2643          *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2644          *      using Option 1).  Effectively this means userspace is unable
2645          *      to disable object-level preemption on these platforms/steppings
2646          *      despite the setting here.
2647          *
2648          *  - Wa_16013994831:  May require that userspace program
2649          *      CS_CHICKEN1[10] when certain runtime conditions are true.
2650          *      Userspace requires Option 2 to be in effect for their update of
2651          *      CS_CHICKEN1[10] to be effective.
2652          *
2653          * Other workarounds may appear in the future that will also require
2654          * Option 2 behavior to allow proper userspace implementation.
2655          */
2656         if (GRAPHICS_VER(i915) >= 9)
2657                 wa_masked_en(wal,
2658                              GEN7_FF_SLICE_CS_CHICKEN1,
2659                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
2660
2661         if (IS_SKYLAKE(i915) ||
2662             IS_KABYLAKE(i915) ||
2663             IS_COFFEELAKE(i915) ||
2664             IS_COMETLAKE(i915)) {
2665                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2666                 wa_write_or(wal,
2667                             GEN8_GARBCNTL,
2668                             GEN9_GAPS_TSV_CREDIT_DISABLE);
2669         }
2670
2671         if (IS_BROXTON(i915)) {
2672                 /* WaDisablePooledEuLoadBalancingFix:bxt */
2673                 wa_masked_en(wal,
2674                              FF_SLICE_CS_CHICKEN2,
2675                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
2676         }
2677
2678         if (GRAPHICS_VER(i915) == 9) {
2679                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2680                 wa_masked_en(wal,
2681                              GEN9_CSFE_CHICKEN1_RCS,
2682                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
2683
2684                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2685                 wa_mcr_write_or(wal,
2686                                 BDW_SCRATCH1,
2687                                 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2688
2689                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2690                 if (IS_GEN9_LP(i915))
2691                         wa_mcr_write_clr_set(wal,
2692                                              GEN8_L3SQCREG1,
2693                                              L3_PRIO_CREDITS_MASK,
2694                                              L3_GENERAL_PRIO_CREDITS(62) |
2695                                              L3_HIGH_PRIO_CREDITS(2));
2696
2697                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2698                 wa_mcr_write_or(wal,
2699                                 GEN8_L3SQCREG4,
2700                                 GEN8_LQSC_FLUSH_COHERENT_LINES);
2701
2702                 /* Disable atomics in L3 to prevent unrecoverable hangs */
2703                 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
2704                                  GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2705                 wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
2706                                      GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2707                 wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
2708                                      EVICTION_PERF_FIX_ENABLE, 0);
2709         }
2710
2711         if (IS_HASWELL(i915)) {
2712                 /* WaSampleCChickenBitEnable:hsw */
2713                 wa_masked_en(wal,
2714                              HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2715
2716                 wa_masked_dis(wal,
2717                               CACHE_MODE_0_GEN7,
2718                               /* enable HiZ Raw Stall Optimization */
2719                               HIZ_RAW_STALL_OPT_DISABLE);
2720         }
2721
2722         if (IS_VALLEYVIEW(i915)) {
2723                 /* WaDisableEarlyCull:vlv */
2724                 wa_masked_en(wal,
2725                              _3D_CHICKEN3,
2726                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2727
2728                 /*
2729                  * WaVSThreadDispatchOverride:ivb,vlv
2730                  *
2731                  * This actually overrides the dispatch
2732                  * mode for all thread types.
2733                  */
2734                 wa_write_clr_set(wal,
2735                                  GEN7_FF_THREAD_MODE,
2736                                  GEN7_FF_SCHED_MASK,
2737                                  GEN7_FF_TS_SCHED_HW |
2738                                  GEN7_FF_VS_SCHED_HW |
2739                                  GEN7_FF_DS_SCHED_HW);
2740
2741                 /* WaPsdDispatchEnable:vlv */
2742                 /* WaDisablePSDDualDispatchEnable:vlv */
2743                 wa_masked_en(wal,
2744                              GEN7_HALF_SLICE_CHICKEN1,
2745                              GEN7_MAX_PS_THREAD_DEP |
2746                              GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2747         }
2748
2749         if (IS_IVYBRIDGE(i915)) {
2750                 /* WaDisableEarlyCull:ivb */
2751                 wa_masked_en(wal,
2752                              _3D_CHICKEN3,
2753                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2754
2755                 if (0) { /* causes HiZ corruption on ivb:gt1 */
2756                         /* enable HiZ Raw Stall Optimization */
2757                         wa_masked_dis(wal,
2758                                       CACHE_MODE_0_GEN7,
2759                                       HIZ_RAW_STALL_OPT_DISABLE);
2760                 }
2761
2762                 /*
2763                  * WaVSThreadDispatchOverride:ivb,vlv
2764                  *
2765                  * This actually overrides the dispatch
2766                  * mode for all thread types.
2767                  */
2768                 wa_write_clr_set(wal,
2769                                  GEN7_FF_THREAD_MODE,
2770                                  GEN7_FF_SCHED_MASK,
2771                                  GEN7_FF_TS_SCHED_HW |
2772                                  GEN7_FF_VS_SCHED_HW |
2773                                  GEN7_FF_DS_SCHED_HW);
2774
2775                 /* WaDisablePSDDualDispatchEnable:ivb */
2776                 if (IS_IVB_GT1(i915))
2777                         wa_masked_en(wal,
2778                                      GEN7_HALF_SLICE_CHICKEN1,
2779                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2780         }
2781
2782         if (GRAPHICS_VER(i915) == 7) {
2783                 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2784                 wa_masked_en(wal,
2785                              RING_MODE_GEN7(RENDER_RING_BASE),
2786                              GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
2787
2788                 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2789                 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
2790
2791                 /*
2792                  * BSpec says this must be set, even though
2793                  * WaDisable4x2SubspanOptimization:ivb,hsw
2794                  * WaDisable4x2SubspanOptimization isn't listed for VLV.
2795                  */
2796                 wa_masked_en(wal,
2797                              CACHE_MODE_1,
2798                              PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
2799
2800                 /*
2801                  * BSpec recommends 8x4 when MSAA is used,
2802                  * however in practice 16x4 seems fastest.
2803                  *
2804                  * Note that PS/WM thread counts depend on the WIZ hashing
2805                  * disable bit, which we don't touch here, but it's good
2806                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2807                  */
2808                 wa_masked_field_set(wal,
2809                                     GEN7_GT_MODE,
2810                                     GEN6_WIZ_HASHING_MASK,
2811                                     GEN6_WIZ_HASHING_16x4);
2812         }
2813
2814         if (IS_GRAPHICS_VER(i915, 6, 7))
2815                 /*
2816                  * We need to disable the AsyncFlip performance optimisations in
2817                  * order to use MI_WAIT_FOR_EVENT within the CS. It should
2818                  * already be programmed to '1' on all products.
2819                  *
2820                  * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2821                  */
2822                 wa_masked_en(wal,
2823                              RING_MI_MODE(RENDER_RING_BASE),
2824                              ASYNC_FLIP_PERF_DISABLE);
2825
2826         if (GRAPHICS_VER(i915) == 6) {
2827                 /*
2828                  * Required for the hardware to program scanline values for
2829                  * waiting
2830                  * WaEnableFlushTlbInvalidationMode:snb
2831                  */
2832                 wa_masked_en(wal,
2833                              GFX_MODE,
2834                              GFX_TLB_INVALIDATE_EXPLICIT);
2835
2836                 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2837                 wa_masked_en(wal,
2838                              _3D_CHICKEN,
2839                              _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
2840
2841                 wa_masked_en(wal,
2842                              _3D_CHICKEN3,
2843                              /* WaStripsFansDisableFastClipPerformanceFix:snb */
2844                              _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2845                              /*
2846                               * Bspec says:
2847                               * "This bit must be set if 3DSTATE_CLIP clip mode is set
2848                               * to normal and 3DSTATE_SF number of SF output attributes
2849                               * is more than 16."
2850                               */
2851                              _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2852
2853                 /*
2854                  * BSpec recommends 8x4 when MSAA is used,
2855                  * however in practice 16x4 seems fastest.
2856                  *
2857                  * Note that PS/WM thread counts depend on the WIZ hashing
2858                  * disable bit, which we don't touch here, but it's good
2859                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2860                  */
2861                 wa_masked_field_set(wal,
2862                                     GEN6_GT_MODE,
2863                                     GEN6_WIZ_HASHING_MASK,
2864                                     GEN6_WIZ_HASHING_16x4);
2865
2866                 /* WaDisable_RenderCache_OperationalFlush:snb */
2867                 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2868
2869                 /*
2870                  * From the Sandybridge PRM, volume 1 part 3, page 24:
2871                  * "If this bit is set, STCunit will have LRA as replacement
2872                  *  policy. [...] This bit must be reset. LRA replacement
2873                  *  policy is not supported."
2874                  */
2875                 wa_masked_dis(wal,
2876                               CACHE_MODE_0,
2877                               CM0_STC_EVICT_DISABLE_LRA_SNB);
2878         }
2879
2880         if (IS_GRAPHICS_VER(i915, 4, 6))
2881                 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2882                 wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2883                        0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2884                        /* XXX bit doesn't stick on Broadwater */
2885                        IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2886
2887         if (GRAPHICS_VER(i915) == 4)
2888                 /*
2889                  * Disable CONSTANT_BUFFER before it is loaded from the context
2890                  * image. For as it is loaded, it is executed and the stored
2891                  * address may no longer be valid, leading to a GPU hang.
2892                  *
2893                  * This imposes the requirement that userspace reload their
2894                  * CONSTANT_BUFFER on every batch, fortunately a requirement
2895                  * they are already accustomed to from before contexts were
2896                  * enabled.
2897                  */
2898                 wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2899                        0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2900                        0 /* XXX bit doesn't stick on Broadwater */,
2901                        true);
2902 }
2903
2904 static void
2905 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2906 {
2907         struct drm_i915_private *i915 = engine->i915;
2908
2909         /* WaKBLVECSSemaphoreWaitPoll:kbl */
2910         if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2911                 wa_write(wal,
2912                          RING_SEMA_WAIT_POLL(engine->mmio_base),
2913                          1);
2914         }
2915 }
2916
2917 static void
2918 ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2919 {
2920         if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
2921                 /* Wa_14014999345:pvc */
2922                 wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2923         }
2924 }
2925
2926 /*
2927  * The bspec performance guide has recommended MMIO tuning settings.  These
2928  * aren't truly "workarounds" but we want to program them with the same
2929  * workaround infrastructure to ensure that they're automatically added to
2930  * the GuC save/restore lists, re-applied at the right times, and checked for
2931  * any conflicting programming requested by real workarounds.
2932  *
2933  * Programming settings should be added here only if their registers are not
2934  * part of an engine's register state context.  If a register is part of a
2935  * context, then any tuning settings should be programmed in an appropriate
2936  * function invoked by __intel_engine_init_ctx_wa().
2937  */
2938 static void
2939 add_render_compute_tuning_settings(struct drm_i915_private *i915,
2940                                    struct i915_wa_list *wal)
2941 {
2942         if (IS_DG2(i915))
2943                 wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2944
2945         /*
2946          * This tuning setting proves beneficial only on ATS-M designs; the
2947          * default "age based" setting is optimal on regular DG2 and other
2948          * platforms.
2949          */
2950         if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2951                 wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
2952                                         THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2953
2954         if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
2955                 wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
2956 }
2957
2958 /*
2959  * The workarounds in this function apply to shared registers in
2960  * the general render reset domain that aren't tied to a
2961  * specific engine.  Since all render+compute engines get reset
2962  * together, and the contents of these registers are lost during
2963  * the shared render domain reset, we'll define such workarounds
2964  * here and then add them to just a single RCS or CCS engine's
2965  * workaround list (whichever engine has the XXXX flag).
2966  */
2967 static void
2968 general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2969 {
2970         struct drm_i915_private *i915 = engine->i915;
2971
2972         add_render_compute_tuning_settings(i915, wal);
2973
2974         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2975             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2976             IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2977             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2978                 /* Wa_22013037850 */
2979                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
2980                                 DISABLE_128B_EVICTION_COMMAND_UDW);
2981         }
2982
2983         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2984             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2985             IS_PONTEVECCHIO(i915) ||
2986             IS_DG2(i915)) {
2987                 /* Wa_22014226127 */
2988                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
2989         }
2990
2991         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2992             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2993             IS_DG2(i915)) {
2994                 /* Wa_18017747507 */
2995                 wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
2996         }
2997
2998         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
2999             IS_DG2_G11(i915)) {
3000                 /*
3001                  * Wa_22012826095:dg2
3002                  * Wa_22013059131:dg2
3003                  */
3004                 wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
3005                                      MAXREQS_PER_BANK,
3006                                      REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
3007
3008                 /* Wa_22013059131:dg2 */
3009                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
3010                                 FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
3011         }
3012
3013         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
3014                 /*
3015                  * Wa_14010918519:dg2_g10
3016                  *
3017                  * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
3018                  * so ignoring verification.
3019                  */
3020                 wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
3021                            FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
3022                            0, false);
3023         }
3024
3025         if (IS_XEHPSDV(i915)) {
3026                 /* Wa_1409954639 */
3027                 wa_mcr_masked_en(wal,
3028                                  GEN8_ROW_CHICKEN,
3029                                  SYSTOLIC_DOP_CLOCK_GATING_DIS);
3030
3031                 /* Wa_1607196519 */
3032                 wa_mcr_masked_en(wal,
3033                                  GEN9_ROW_CHICKEN4,
3034                                  GEN12_DISABLE_GRF_CLEAR);
3035
3036                 /* Wa_14010449647:xehpsdv */
3037                 wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
3038                                  GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
3039         }
3040
3041         if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
3042                 /* Wa_14015227452:dg2,pvc */
3043                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
3044
3045                 /* Wa_16015675438:dg2,pvc */
3046                 wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
3047         }
3048
3049         if (IS_DG2(i915)) {
3050                 /*
3051                  * Wa_16011620976:dg2_g11
3052                  * Wa_22015475538:dg2
3053                  */
3054                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
3055         }
3056
3057         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
3058                 /*
3059                  * Wa_22012654132
3060                  *
3061                  * Note that register 0xE420 is write-only and cannot be read
3062                  * back for verification on DG2 (due to Wa_14012342262), so
3063                  * we need to explicitly skip the readback.
3064                  */
3065                 wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
3066                            _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
3067                            0 /* write-only, so skip validation */,
3068                            true);
3069 }
3070
3071 static void
3072 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
3073 {
3074         if (GRAPHICS_VER(engine->i915) < 4)
3075                 return;
3076
3077         engine_fake_wa_init(engine, wal);
3078
3079         /*
3080          * These are common workarounds that just need to applied
3081          * to a single RCS/CCS engine's workaround list since
3082          * they're reset as part of the general render domain reset.
3083          */
3084         if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3085                 general_render_compute_wa_init(engine, wal);
3086
3087         if (engine->class == COMPUTE_CLASS)
3088                 ccs_engine_wa_init(engine, wal);
3089         else if (engine->class == RENDER_CLASS)
3090                 rcs_engine_wa_init(engine, wal);
3091         else
3092                 xcs_engine_wa_init(engine, wal);
3093 }
3094
3095 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
3096 {
3097         struct i915_wa_list *wal = &engine->wa_list;
3098
3099         wa_init_start(wal, engine->gt, "engine", engine->name);
3100         engine_init_workarounds(engine, wal);
3101         wa_init_finish(wal);
3102 }
3103
3104 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
3105 {
3106         wa_list_apply(&engine->wa_list);
3107 }
3108
3109 static const struct i915_range mcr_ranges_gen8[] = {
3110         { .start = 0x5500, .end = 0x55ff },
3111         { .start = 0x7000, .end = 0x7fff },
3112         { .start = 0x9400, .end = 0x97ff },
3113         { .start = 0xb000, .end = 0xb3ff },
3114         { .start = 0xe000, .end = 0xe7ff },
3115         {},
3116 };
3117
3118 static const struct i915_range mcr_ranges_gen12[] = {
3119         { .start =  0x8150, .end =  0x815f },
3120         { .start =  0x9520, .end =  0x955f },
3121         { .start =  0xb100, .end =  0xb3ff },
3122         { .start =  0xde80, .end =  0xe8ff },
3123         { .start = 0x24a00, .end = 0x24a7f },
3124         {},
3125 };
3126
3127 static const struct i915_range mcr_ranges_xehp[] = {
3128         { .start =  0x4000, .end =  0x4aff },
3129         { .start =  0x5200, .end =  0x52ff },
3130         { .start =  0x5400, .end =  0x7fff },
3131         { .start =  0x8140, .end =  0x815f },
3132         { .start =  0x8c80, .end =  0x8dff },
3133         { .start =  0x94d0, .end =  0x955f },
3134         { .start =  0x9680, .end =  0x96ff },
3135         { .start =  0xb000, .end =  0xb3ff },
3136         { .start =  0xc800, .end =  0xcfff },
3137         { .start =  0xd800, .end =  0xd8ff },
3138         { .start =  0xdc00, .end =  0xffff },
3139         { .start = 0x17000, .end = 0x17fff },
3140         { .start = 0x24a00, .end = 0x24a7f },
3141         {},
3142 };
3143
3144 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
3145 {
3146         const struct i915_range *mcr_ranges;
3147         int i;
3148
3149         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
3150                 mcr_ranges = mcr_ranges_xehp;
3151         else if (GRAPHICS_VER(i915) >= 12)
3152                 mcr_ranges = mcr_ranges_gen12;
3153         else if (GRAPHICS_VER(i915) >= 8)
3154                 mcr_ranges = mcr_ranges_gen8;
3155         else
3156                 return false;
3157
3158         /*
3159          * Registers in these ranges are affected by the MCR selector
3160          * which only controls CPU initiated MMIO. Routing does not
3161          * work for CS access so we cannot verify them on this path.
3162          */
3163         for (i = 0; mcr_ranges[i].start; i++)
3164                 if (offset >= mcr_ranges[i].start &&
3165                     offset <= mcr_ranges[i].end)
3166                         return true;
3167
3168         return false;
3169 }
3170
3171 static int
3172 wa_list_srm(struct i915_request *rq,
3173             const struct i915_wa_list *wal,
3174             struct i915_vma *vma)
3175 {
3176         struct drm_i915_private *i915 = rq->engine->i915;
3177         unsigned int i, count = 0;
3178         const struct i915_wa *wa;
3179         u32 srm, *cs;
3180
3181         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3182         if (GRAPHICS_VER(i915) >= 8)
3183                 srm++;
3184
3185         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3186                 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
3187                         count++;
3188         }
3189
3190         cs = intel_ring_begin(rq, 4 * count);
3191         if (IS_ERR(cs))
3192                 return PTR_ERR(cs);
3193
3194         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3195                 u32 offset = i915_mmio_reg_offset(wa->reg);
3196
3197                 if (mcr_range(i915, offset))
3198                         continue;
3199
3200                 *cs++ = srm;
3201                 *cs++ = offset;
3202                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
3203                 *cs++ = 0;
3204         }
3205         intel_ring_advance(rq, cs);
3206
3207         return 0;
3208 }
3209
3210 static int engine_wa_list_verify(struct intel_context *ce,
3211                                  const struct i915_wa_list * const wal,
3212                                  const char *from)
3213 {
3214         const struct i915_wa *wa;
3215         struct i915_request *rq;
3216         struct i915_vma *vma;
3217         struct i915_gem_ww_ctx ww;
3218         unsigned int i;
3219         u32 *results;
3220         int err;
3221
3222         if (!wal->count)
3223                 return 0;
3224
3225         vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
3226                                            wal->count * sizeof(u32));
3227         if (IS_ERR(vma))
3228                 return PTR_ERR(vma);
3229
3230         intel_engine_pm_get(ce->engine);
3231         i915_gem_ww_ctx_init(&ww, false);
3232 retry:
3233         err = i915_gem_object_lock(vma->obj, &ww);
3234         if (err == 0)
3235                 err = intel_context_pin_ww(ce, &ww);
3236         if (err)
3237                 goto err_pm;
3238
3239         err = i915_vma_pin_ww(vma, &ww, 0, 0,
3240                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
3241         if (err)
3242                 goto err_unpin;
3243
3244         rq = i915_request_create(ce);
3245         if (IS_ERR(rq)) {
3246                 err = PTR_ERR(rq);
3247                 goto err_vma;
3248         }
3249
3250         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3251         if (err == 0)
3252                 err = wa_list_srm(rq, wal, vma);
3253
3254         i915_request_get(rq);
3255         if (err)
3256                 i915_request_set_error_once(rq, err);
3257         i915_request_add(rq);
3258
3259         if (err)
3260                 goto err_rq;
3261
3262         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3263                 err = -ETIME;
3264                 goto err_rq;
3265         }
3266
3267         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
3268         if (IS_ERR(results)) {
3269                 err = PTR_ERR(results);
3270                 goto err_rq;
3271         }
3272
3273         err = 0;
3274         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3275                 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
3276                         continue;
3277
3278                 if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3279                         err = -ENXIO;
3280         }
3281
3282         i915_gem_object_unpin_map(vma->obj);
3283
3284 err_rq:
3285         i915_request_put(rq);
3286 err_vma:
3287         i915_vma_unpin(vma);
3288 err_unpin:
3289         intel_context_unpin(ce);
3290 err_pm:
3291         if (err == -EDEADLK) {
3292                 err = i915_gem_ww_ctx_backoff(&ww);
3293                 if (!err)
3294                         goto retry;
3295         }
3296         i915_gem_ww_ctx_fini(&ww);
3297         intel_engine_pm_put(ce->engine);
3298         i915_vma_put(vma);
3299         return err;
3300 }
3301
3302 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
3303                                     const char *from)
3304 {
3305         return engine_wa_list_verify(engine->kernel_context,
3306                                      &engine->wa_list,
3307                                      from);
3308 }
3309
3310 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3311 #include "selftest_workarounds.c"
3312 #endif