Commit | Line | Data |
---|---|---|
d0d829e5 DCS |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2014 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include "gen8_engine_cs.h" | |
7 | #include "i915_drv.h" | |
166c44e6 | 8 | #include "intel_engine_regs.h" |
d0d829e5 | 9 | #include "intel_gpu_commands.h" |
0d6419e9 | 10 | #include "intel_lrc.h" |
d0d829e5 DCS |
11 | #include "intel_ring.h" |
12 | ||
13 | int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) | |
14 | { | |
15 | bool vf_flush_wa = false, dc_flush_wa = false; | |
16 | u32 *cs, flags = 0; | |
17 | int len; | |
18 | ||
19 | flags |= PIPE_CONTROL_CS_STALL; | |
20 | ||
21 | if (mode & EMIT_FLUSH) { | |
22 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | |
23 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
24 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | |
25 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | |
26 | } | |
27 | ||
28 | if (mode & EMIT_INVALIDATE) { | |
29 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | |
30 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
31 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
32 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
33 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
34 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
35 | flags |= PIPE_CONTROL_QW_WRITE; | |
36 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | |
37 | ||
38 | /* | |
39 | * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL | |
40 | * pipe control. | |
41 | */ | |
c816723b | 42 | if (GRAPHICS_VER(rq->engine->i915) == 9) |
d0d829e5 DCS |
43 | vf_flush_wa = true; |
44 | ||
45 | /* WaForGAMHang:kbl */ | |
c1f110ee | 46 | if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) |
d0d829e5 DCS |
47 | dc_flush_wa = true; |
48 | } | |
49 | ||
50 | len = 6; | |
51 | ||
52 | if (vf_flush_wa) | |
53 | len += 6; | |
54 | ||
55 | if (dc_flush_wa) | |
56 | len += 12; | |
57 | ||
58 | cs = intel_ring_begin(rq, len); | |
59 | if (IS_ERR(cs)) | |
60 | return PTR_ERR(cs); | |
61 | ||
62 | if (vf_flush_wa) | |
63 | cs = gen8_emit_pipe_control(cs, 0, 0); | |
64 | ||
65 | if (dc_flush_wa) | |
66 | cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, | |
67 | 0); | |
68 | ||
69 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); | |
70 | ||
71 | if (dc_flush_wa) | |
72 | cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); | |
73 | ||
74 | intel_ring_advance(rq, cs); | |
75 | ||
76 | return 0; | |
77 | } | |
78 | ||
79 | int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode) | |
80 | { | |
81 | u32 cmd, *cs; | |
82 | ||
83 | cs = intel_ring_begin(rq, 4); | |
84 | if (IS_ERR(cs)) | |
85 | return PTR_ERR(cs); | |
86 | ||
87 | cmd = MI_FLUSH_DW + 1; | |
88 | ||
89 | /* | |
90 | * We always require a command barrier so that subsequent | |
91 | * commands, such as breadcrumb interrupts, are strictly ordered | |
92 | * wrt the contents of the write cache being flushed to memory | |
93 | * (and thus being coherent from the CPU). | |
94 | */ | |
95 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | |
96 | ||
97 | if (mode & EMIT_INVALIDATE) { | |
98 | cmd |= MI_INVALIDATE_TLB; | |
99 | if (rq->engine->class == VIDEO_DECODE_CLASS) | |
100 | cmd |= MI_INVALIDATE_BSD; | |
101 | } | |
102 | ||
103 | *cs++ = cmd; | |
104 | *cs++ = LRC_PPHWSP_SCRATCH_ADDR; | |
105 | *cs++ = 0; /* upper addr */ | |
106 | *cs++ = 0; /* value */ | |
107 | intel_ring_advance(rq, cs); | |
108 | ||
109 | return 0; | |
110 | } | |
111 | ||
112 | int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode) | |
113 | { | |
114 | if (mode & EMIT_FLUSH) { | |
115 | u32 *cs; | |
116 | u32 flags = 0; | |
117 | ||
118 | flags |= PIPE_CONTROL_CS_STALL; | |
119 | ||
120 | flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; | |
121 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | |
122 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
123 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | |
124 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | |
125 | flags |= PIPE_CONTROL_QW_WRITE; | |
126 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | |
127 | ||
128 | cs = intel_ring_begin(rq, 6); | |
129 | if (IS_ERR(cs)) | |
130 | return PTR_ERR(cs); | |
131 | ||
132 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); | |
133 | intel_ring_advance(rq, cs); | |
134 | } | |
135 | ||
136 | if (mode & EMIT_INVALIDATE) { | |
137 | u32 *cs; | |
138 | u32 flags = 0; | |
139 | ||
140 | flags |= PIPE_CONTROL_CS_STALL; | |
141 | ||
142 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; | |
143 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | |
144 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
145 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
146 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
147 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
148 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
149 | flags |= PIPE_CONTROL_QW_WRITE; | |
150 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | |
151 | ||
152 | cs = intel_ring_begin(rq, 6); | |
153 | if (IS_ERR(cs)) | |
154 | return PTR_ERR(cs); | |
155 | ||
156 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); | |
157 | intel_ring_advance(rq, cs); | |
158 | } | |
159 | ||
160 | return 0; | |
161 | } | |
162 | ||
163 | static u32 preparser_disable(bool state) | |
164 | { | |
165 | return MI_ARB_CHECK | 1 << 8 | state; | |
166 | } | |
167 | ||
d8b93201 | 168 | u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg) |
d0d829e5 | 169 | { |
d8b93201 | 170 | *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; |
d0d829e5 DCS |
171 | *cs++ = i915_mmio_reg_offset(inv_reg); |
172 | *cs++ = AUX_INV; | |
173 | *cs++ = MI_NOOP; | |
174 | ||
175 | return cs; | |
176 | } | |
177 | ||
178 | int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) | |
179 | { | |
803efd29 DCS |
180 | struct intel_engine_cs *engine = rq->engine; |
181 | ||
d0d829e5 DCS |
182 | if (mode & EMIT_FLUSH) { |
183 | u32 flags = 0; | |
184 | u32 *cs; | |
185 | ||
186 | flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; | |
187 | flags |= PIPE_CONTROL_FLUSH_L3; | |
188 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | |
189 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | |
8c209f42 | 190 | /* Wa_1409600907:tgl,adl-p */ |
d0d829e5 DCS |
191 | flags |= PIPE_CONTROL_DEPTH_STALL; |
192 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | |
193 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | |
194 | ||
195 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | |
196 | flags |= PIPE_CONTROL_QW_WRITE; | |
197 | ||
198 | flags |= PIPE_CONTROL_CS_STALL; | |
199 | ||
803efd29 DCS |
200 | if (engine->class == COMPUTE_CLASS) |
201 | flags &= ~PIPE_CONTROL_3D_FLAGS; | |
202 | ||
d0d829e5 DCS |
203 | cs = intel_ring_begin(rq, 6); |
204 | if (IS_ERR(cs)) | |
205 | return PTR_ERR(cs); | |
206 | ||
207 | cs = gen12_emit_pipe_control(cs, | |
208 | PIPE_CONTROL0_HDC_PIPELINE_FLUSH, | |
209 | flags, LRC_PPHWSP_SCRATCH_ADDR); | |
210 | intel_ring_advance(rq, cs); | |
211 | } | |
212 | ||
213 | if (mode & EMIT_INVALIDATE) { | |
214 | u32 flags = 0; | |
6639fabb | 215 | u32 *cs, count; |
d0d829e5 DCS |
216 | |
217 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; | |
218 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | |
219 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | |
220 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | |
221 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | |
222 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | |
223 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | |
224 | ||
225 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | |
226 | flags |= PIPE_CONTROL_QW_WRITE; | |
227 | ||
228 | flags |= PIPE_CONTROL_CS_STALL; | |
229 | ||
803efd29 DCS |
230 | if (engine->class == COMPUTE_CLASS) |
231 | flags &= ~PIPE_CONTROL_3D_FLAGS; | |
232 | ||
6639fabb MR |
233 | if (!HAS_FLAT_CCS(rq->engine->i915)) |
234 | count = 8 + 4; | |
235 | else | |
236 | count = 8; | |
237 | ||
238 | cs = intel_ring_begin(rq, count); | |
d0d829e5 DCS |
239 | if (IS_ERR(cs)) |
240 | return PTR_ERR(cs); | |
241 | ||
242 | /* | |
243 | * Prevent the pre-parser from skipping past the TLB | |
244 | * invalidate and loading a stale page for the batch | |
245 | * buffer / request payload. | |
246 | */ | |
247 | *cs++ = preparser_disable(true); | |
248 | ||
249 | cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); | |
250 | ||
6639fabb MR |
251 | if (!HAS_FLAT_CCS(rq->engine->i915)) { |
252 | /* hsdes: 1809175790 */ | |
d8b93201 | 253 | cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); |
6639fabb | 254 | } |
d0d829e5 DCS |
255 | |
256 | *cs++ = preparser_disable(false); | |
257 | intel_ring_advance(rq, cs); | |
258 | } | |
259 | ||
260 | return 0; | |
261 | } | |
262 | ||
263 | int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) | |
264 | { | |
265 | intel_engine_mask_t aux_inv = 0; | |
266 | u32 cmd, *cs; | |
267 | ||
268 | cmd = 4; | |
6639fabb | 269 | if (mode & EMIT_INVALIDATE) { |
d0d829e5 | 270 | cmd += 2; |
6639fabb | 271 | |
d8b93201 FY |
272 | if (!HAS_FLAT_CCS(rq->engine->i915) && |
273 | (rq->engine->class == VIDEO_DECODE_CLASS || | |
274 | rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { | |
6639fabb MR |
275 | aux_inv = rq->engine->mask & ~BIT(BCS0); |
276 | if (aux_inv) | |
d8b93201 | 277 | cmd += 4; |
6639fabb MR |
278 | } |
279 | } | |
d0d829e5 DCS |
280 | |
281 | cs = intel_ring_begin(rq, cmd); | |
282 | if (IS_ERR(cs)) | |
283 | return PTR_ERR(cs); | |
284 | ||
285 | if (mode & EMIT_INVALIDATE) | |
286 | *cs++ = preparser_disable(true); | |
287 | ||
288 | cmd = MI_FLUSH_DW + 1; | |
289 | ||
290 | /* | |
291 | * We always require a command barrier so that subsequent | |
292 | * commands, such as breadcrumb interrupts, are strictly ordered | |
293 | * wrt the contents of the write cache being flushed to memory | |
294 | * (and thus being coherent from the CPU). | |
295 | */ | |
296 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | |
297 | ||
298 | if (mode & EMIT_INVALIDATE) { | |
299 | cmd |= MI_INVALIDATE_TLB; | |
300 | if (rq->engine->class == VIDEO_DECODE_CLASS) | |
301 | cmd |= MI_INVALIDATE_BSD; | |
302 | } | |
303 | ||
304 | *cs++ = cmd; | |
305 | *cs++ = LRC_PPHWSP_SCRATCH_ADDR; | |
306 | *cs++ = 0; /* upper addr */ | |
307 | *cs++ = 0; /* value */ | |
308 | ||
309 | if (aux_inv) { /* hsdes: 1809175790 */ | |
d8b93201 FY |
310 | if (rq->engine->class == VIDEO_DECODE_CLASS) |
311 | cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); | |
312 | else | |
313 | cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); | |
d0d829e5 DCS |
314 | } |
315 | ||
316 | if (mode & EMIT_INVALIDATE) | |
317 | *cs++ = preparser_disable(false); | |
318 | ||
319 | intel_ring_advance(rq, cs); | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
9834dfef | 324 | static u32 preempt_address(struct intel_engine_cs *engine) |
d0d829e5 DCS |
325 | { |
326 | return (i915_ggtt_offset(engine->status_page.vma) + | |
327 | I915_GEM_HWS_PREEMPT_ADDR); | |
328 | } | |
329 | ||
330 | static u32 hwsp_offset(const struct i915_request *rq) | |
331 | { | |
12ca695d | 332 | const struct intel_timeline *tl; |
d0d829e5 | 333 | |
12ca695d ML |
334 | /* Before the request is executed, the timeline is fixed */ |
335 | tl = rcu_dereference_protected(rq->timeline, | |
336 | !i915_request_signaled(rq)); | |
d0d829e5 | 337 | |
12ca695d ML |
338 | /* See the comment in i915_request_active_seqno(). */ |
339 | return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno); | |
d0d829e5 DCS |
340 | } |
341 | ||
342 | int gen8_emit_init_breadcrumb(struct i915_request *rq) | |
343 | { | |
344 | u32 *cs; | |
345 | ||
346 | GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); | |
347 | if (!i915_request_timeline(rq)->has_initial_breadcrumb) | |
348 | return 0; | |
349 | ||
350 | cs = intel_ring_begin(rq, 6); | |
351 | if (IS_ERR(cs)) | |
352 | return PTR_ERR(cs); | |
353 | ||
1a51b50c CW |
354 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
355 | *cs++ = hwsp_offset(rq); | |
356 | *cs++ = 0; | |
357 | *cs++ = rq->fence.seqno - 1; | |
358 | ||
d0d829e5 DCS |
359 | /* |
360 | * Check if we have been preempted before we even get started. | |
361 | * | |
362 | * After this point i915_request_started() reports true, even if | |
363 | * we get preempted and so are no longer running. | |
1a51b50c CW |
364 | * |
365 | * i915_request_started() is used during preemption processing | |
366 | * to decide if the request is currently inside the user payload | |
367 | * or spinning on a kernel semaphore (or earlier). For no-preemption | |
368 | * requests, we do allow preemption on the semaphore before the user | |
369 | * payload, but do not allow preemption once the request is started. | |
370 | * | |
371 | * i915_request_started() is similarly used during GPU hangs to | |
372 | * determine if the user's payload was guilty, and if so, the | |
373 | * request is banned. Before the request is started, it is assumed | |
374 | * to be unharmed and an innocent victim of another's hang. | |
d0d829e5 | 375 | */ |
d0d829e5 | 376 | *cs++ = MI_NOOP; |
1a51b50c | 377 | *cs++ = MI_ARB_CHECK; |
d0d829e5 DCS |
378 | |
379 | intel_ring_advance(rq, cs); | |
380 | ||
381 | /* Record the updated position of the request's payload */ | |
382 | rq->infix = intel_ring_offset(rq, cs); | |
383 | ||
384 | __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); | |
385 | ||
386 | return 0; | |
387 | } | |
388 | ||
166c44e6 CW |
389 | static int __gen125_emit_bb_start(struct i915_request *rq, |
390 | u64 offset, u32 len, | |
391 | const unsigned int flags, | |
392 | u32 arb) | |
393 | { | |
394 | struct intel_context *ce = rq->context; | |
395 | u32 wa_offset = lrc_indirect_bb(ce); | |
396 | u32 *cs; | |
397 | ||
398 | cs = intel_ring_begin(rq, 12); | |
399 | if (IS_ERR(cs)) | |
400 | return PTR_ERR(cs); | |
401 | ||
402 | *cs++ = MI_ARB_ON_OFF | arb; | |
403 | ||
404 | *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | | |
405 | MI_SRM_LRM_GLOBAL_GTT | | |
406 | MI_LRI_LRM_CS_MMIO; | |
407 | *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); | |
408 | *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; | |
409 | *cs++ = 0; | |
410 | ||
411 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | |
412 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | |
413 | *cs++ = lower_32_bits(offset); | |
414 | *cs++ = upper_32_bits(offset); | |
415 | ||
416 | /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ | |
417 | *cs++ = MI_BATCH_BUFFER_START_GEN8; | |
418 | *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; | |
419 | *cs++ = 0; | |
420 | ||
421 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | |
422 | ||
423 | intel_ring_advance(rq, cs); | |
424 | ||
425 | return 0; | |
426 | } | |
427 | ||
428 | int gen125_emit_bb_start_noarb(struct i915_request *rq, | |
429 | u64 offset, u32 len, | |
430 | const unsigned int flags) | |
431 | { | |
432 | return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); | |
433 | } | |
434 | ||
435 | int gen125_emit_bb_start(struct i915_request *rq, | |
436 | u64 offset, u32 len, | |
437 | const unsigned int flags) | |
438 | { | |
439 | return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); | |
440 | } | |
441 | ||
d0d829e5 DCS |
442 | int gen8_emit_bb_start_noarb(struct i915_request *rq, |
443 | u64 offset, u32 len, | |
444 | const unsigned int flags) | |
445 | { | |
446 | u32 *cs; | |
447 | ||
448 | cs = intel_ring_begin(rq, 4); | |
449 | if (IS_ERR(cs)) | |
450 | return PTR_ERR(cs); | |
451 | ||
452 | /* | |
453 | * WaDisableCtxRestoreArbitration:bdw,chv | |
454 | * | |
455 | * We don't need to perform MI_ARB_ENABLE as often as we do (in | |
456 | * particular all the gen that do not need the w/a at all!), if we | |
457 | * took care to make sure that on every switch into this context | |
458 | * (both ordinary and for preemption) that arbitrartion was enabled | |
459 | * we would be fine. However, for gen8 there is another w/a that | |
460 | * requires us to not preempt inside GPGPU execution, so we keep | |
461 | * arbitration disabled for gen8 batches. Arbitration will be | |
462 | * re-enabled before we close the request | |
463 | * (engine->emit_fini_breadcrumb). | |
464 | */ | |
465 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | |
466 | ||
467 | /* FIXME(BDW+): Address space and security selectors. */ | |
468 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | |
469 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | |
470 | *cs++ = lower_32_bits(offset); | |
471 | *cs++ = upper_32_bits(offset); | |
472 | ||
473 | intel_ring_advance(rq, cs); | |
474 | ||
475 | return 0; | |
476 | } | |
477 | ||
478 | int gen8_emit_bb_start(struct i915_request *rq, | |
479 | u64 offset, u32 len, | |
480 | const unsigned int flags) | |
481 | { | |
482 | u32 *cs; | |
483 | ||
9b3a8f55 CW |
484 | if (unlikely(i915_request_has_nopreempt(rq))) |
485 | return gen8_emit_bb_start_noarb(rq, offset, len, flags); | |
486 | ||
d0d829e5 DCS |
487 | cs = intel_ring_begin(rq, 6); |
488 | if (IS_ERR(cs)) | |
489 | return PTR_ERR(cs); | |
490 | ||
491 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | |
492 | ||
493 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | |
494 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | |
495 | *cs++ = lower_32_bits(offset); | |
496 | *cs++ = upper_32_bits(offset); | |
497 | ||
498 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | |
499 | *cs++ = MI_NOOP; | |
500 | ||
501 | intel_ring_advance(rq, cs); | |
502 | ||
503 | return 0; | |
504 | } | |
505 | ||
506 | static void assert_request_valid(struct i915_request *rq) | |
507 | { | |
508 | struct intel_ring *ring __maybe_unused = rq->ring; | |
509 | ||
510 | /* Can we unwind this request without appearing to go forwards? */ | |
511 | GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); | |
512 | } | |
513 | ||
514 | /* | |
515 | * Reserve space for 2 NOOPs at the end of each request to be | |
516 | * used as a workaround for not being allowed to do lite | |
517 | * restore with HEAD==TAIL (WaIdleLiteRestore). | |
518 | */ | |
519 | static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) | |
520 | { | |
521 | /* Ensure there's always at least one preemption point per-request. */ | |
522 | *cs++ = MI_ARB_CHECK; | |
523 | *cs++ = MI_NOOP; | |
524 | rq->wa_tail = intel_ring_offset(rq, cs); | |
525 | ||
526 | /* Check that entire request is less than half the ring */ | |
527 | assert_request_valid(rq); | |
528 | ||
529 | return cs; | |
530 | } | |
531 | ||
532 | static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) | |
533 | { | |
b06b8103 | 534 | *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ |
d0d829e5 DCS |
535 | *cs++ = MI_SEMAPHORE_WAIT | |
536 | MI_SEMAPHORE_GLOBAL_GTT | | |
537 | MI_SEMAPHORE_POLL | | |
538 | MI_SEMAPHORE_SAD_EQ_SDD; | |
539 | *cs++ = 0; | |
540 | *cs++ = preempt_address(rq->engine); | |
541 | *cs++ = 0; | |
b06b8103 | 542 | *cs++ = MI_NOOP; |
d0d829e5 DCS |
543 | |
544 | return cs; | |
545 | } | |
546 | ||
547 | static __always_inline u32* | |
548 | gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) | |
549 | { | |
550 | *cs++ = MI_USER_INTERRUPT; | |
551 | ||
552 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | |
e6cb8dc9 MB |
553 | if (intel_engine_has_semaphores(rq->engine) && |
554 | !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) | |
d0d829e5 DCS |
555 | cs = emit_preempt_busywait(rq, cs); |
556 | ||
557 | rq->tail = intel_ring_offset(rq, cs); | |
558 | assert_ring_tail_valid(rq->ring, rq->tail); | |
559 | ||
560 | return gen8_emit_wa_tail(rq, cs); | |
561 | } | |
562 | ||
563 | static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) | |
564 | { | |
565 | return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); | |
566 | } | |
567 | ||
568 | u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) | |
569 | { | |
570 | return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); | |
571 | } | |
572 | ||
573 | u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | |
574 | { | |
575 | cs = gen8_emit_pipe_control(cs, | |
576 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
577 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
578 | PIPE_CONTROL_DC_FLUSH_ENABLE, | |
579 | 0); | |
580 | ||
581 | /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ | |
582 | cs = gen8_emit_ggtt_write_rcs(cs, | |
583 | rq->fence.seqno, | |
584 | hwsp_offset(rq), | |
585 | PIPE_CONTROL_FLUSH_ENABLE | | |
586 | PIPE_CONTROL_CS_STALL); | |
587 | ||
588 | return gen8_emit_fini_breadcrumb_tail(rq, cs); | |
589 | } | |
590 | ||
591 | u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | |
592 | { | |
593 | cs = gen8_emit_ggtt_write_rcs(cs, | |
594 | rq->fence.seqno, | |
595 | hwsp_offset(rq), | |
596 | PIPE_CONTROL_CS_STALL | | |
597 | PIPE_CONTROL_TILE_CACHE_FLUSH | | |
598 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
599 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
600 | PIPE_CONTROL_DC_FLUSH_ENABLE | | |
601 | PIPE_CONTROL_FLUSH_ENABLE); | |
602 | ||
603 | return gen8_emit_fini_breadcrumb_tail(rq, cs); | |
604 | } | |
605 | ||
606 | /* | |
607 | * Note that the CS instruction pre-parser will not stall on the breadcrumb | |
608 | * flush and will continue pre-fetching the instructions after it before the | |
609 | * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at | |
610 | * BB_START/END instructions, so, even though we might pre-fetch the pre-amble | |
611 | * of the next request before the memory has been flushed, we're guaranteed that | |
612 | * we won't access the batch itself too early. | |
613 | * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, | |
614 | * so, if the current request is modifying an instruction in the next request on | |
615 | * the same intel_context, we might pre-fetch and then execute the pre-update | |
616 | * instruction. To avoid this, the users of self-modifying code should either | |
617 | * disable the parser around the code emitting the memory writes, via a new flag | |
618 | * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For | |
619 | * the in-kernel use-cases we've opted to use a separate context, see | |
620 | * reloc_gpu() as an example. | |
621 | * All the above applies only to the instructions themselves. Non-inline data | |
622 | * used by the instructions is not pre-fetched. | |
623 | */ | |
624 | ||
625 | static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) | |
626 | { | |
49b20dbf | 627 | *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ |
d0d829e5 DCS |
628 | *cs++ = MI_SEMAPHORE_WAIT_TOKEN | |
629 | MI_SEMAPHORE_GLOBAL_GTT | | |
630 | MI_SEMAPHORE_POLL | | |
631 | MI_SEMAPHORE_SAD_EQ_SDD; | |
632 | *cs++ = 0; | |
633 | *cs++ = preempt_address(rq->engine); | |
634 | *cs++ = 0; | |
635 | *cs++ = 0; | |
d0d829e5 DCS |
636 | |
637 | return cs; | |
638 | } | |
639 | ||
717f9bad MB |
640 | /* Wa_14014475959:dg2 */ |
641 | #define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540 | |
642 | static u32 ccs_semaphore_offset(struct i915_request *rq) | |
643 | { | |
644 | return i915_ggtt_offset(rq->context->state) + | |
645 | (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; | |
646 | } | |
647 | ||
648 | /* Wa_14014475959:dg2 */ | |
649 | static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) | |
650 | { | |
651 | int i; | |
652 | ||
653 | *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | | |
654 | MI_ATOMIC_MOVE; | |
655 | *cs++ = ccs_semaphore_offset(rq); | |
656 | *cs++ = 0; | |
657 | *cs++ = 1; | |
658 | ||
659 | /* | |
660 | * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) | |
661 | * to align. 4 DWs above + 8 filler DWs here. | |
662 | */ | |
663 | for (i = 0; i < 8; ++i) | |
664 | *cs++ = 0; | |
665 | ||
666 | *cs++ = MI_SEMAPHORE_WAIT | | |
667 | MI_SEMAPHORE_GLOBAL_GTT | | |
668 | MI_SEMAPHORE_POLL | | |
669 | MI_SEMAPHORE_SAD_EQ_SDD; | |
670 | *cs++ = 0; | |
671 | *cs++ = ccs_semaphore_offset(rq); | |
672 | *cs++ = 0; | |
673 | ||
674 | return cs; | |
675 | } | |
676 | ||
d0d829e5 DCS |
677 | static __always_inline u32* |
678 | gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) | |
679 | { | |
680 | *cs++ = MI_USER_INTERRUPT; | |
681 | ||
682 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | |
e6cb8dc9 MB |
683 | if (intel_engine_has_semaphores(rq->engine) && |
684 | !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) | |
d0d829e5 DCS |
685 | cs = gen12_emit_preempt_busywait(rq, cs); |
686 | ||
717f9bad MB |
687 | /* Wa_14014475959:dg2 */ |
688 | if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) | |
689 | cs = ccs_emit_wa_busywait(rq, cs); | |
690 | ||
d0d829e5 DCS |
691 | rq->tail = intel_ring_offset(rq, cs); |
692 | assert_ring_tail_valid(rq->ring, rq->tail); | |
693 | ||
694 | return gen8_emit_wa_tail(rq, cs); | |
695 | } | |
696 | ||
697 | u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) | |
698 | { | |
699 | /* XXX Stalling flush before seqno write; post-sync not */ | |
700 | cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); | |
701 | return gen12_emit_fini_breadcrumb_tail(rq, cs); | |
702 | } | |
703 | ||
704 | u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | |
705 | { | |
803efd29 DCS |
706 | struct drm_i915_private *i915 = rq->engine->i915; |
707 | u32 flags = (PIPE_CONTROL_CS_STALL | | |
708 | PIPE_CONTROL_TILE_CACHE_FLUSH | | |
709 | PIPE_CONTROL_FLUSH_L3 | | |
710 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | |
711 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | |
712 | PIPE_CONTROL_DC_FLUSH_ENABLE | | |
713 | PIPE_CONTROL_FLUSH_ENABLE); | |
714 | ||
715 | if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) | |
716 | /* Wa_1409600907 */ | |
717 | flags |= PIPE_CONTROL_DEPTH_STALL; | |
718 | ||
719 | if (rq->engine->class == COMPUTE_CLASS) | |
720 | flags &= ~PIPE_CONTROL_3D_FLAGS; | |
721 | ||
d0d829e5 DCS |
722 | cs = gen12_emit_ggtt_write_rcs(cs, |
723 | rq->fence.seqno, | |
724 | hwsp_offset(rq), | |
725 | PIPE_CONTROL0_HDC_PIPELINE_FLUSH, | |
803efd29 | 726 | flags); |
d0d829e5 DCS |
727 | |
728 | return gen12_emit_fini_breadcrumb_tail(rq, cs); | |
729 | } |