Commit | Line | Data |
---|---|---|
77c8fdae | 1 | /* SPDX-License-Identifier: MIT */ |
8187a2b7 ZN |
2 | #ifndef _INTEL_RINGBUFFER_H_ |
3 | #define _INTEL_RINGBUFFER_H_ | |
4 | ||
d78aa650 DV |
5 | #include <drm/drm_util.h> |
6 | ||
44e895a8 | 7 | #include <linux/hashtable.h> |
741258cd | 8 | #include <linux/seqlock.h> |
e61e0f51 | 9 | |
06fbca71 | 10 | #include "i915_gem_batch_pool.h" |
e61e0f51 | 11 | |
c080363f | 12 | #include "i915_reg.h" |
b46a33e2 | 13 | #include "i915_pmu.h" |
e61e0f51 | 14 | #include "i915_request.h" |
f97fbf96 | 15 | #include "i915_selftest.h" |
a89d1f92 | 16 | #include "i915_timeline.h" |
c080363f | 17 | #include "intel_gpu_commands.h" |
4a15c75c | 18 | #include "intel_workarounds.h" |
44e895a8 | 19 | |
f636edb2 | 20 | struct drm_printer; |
b7268c5e | 21 | struct i915_sched_attr; |
f636edb2 | 22 | |
44e895a8 BV |
23 | #define I915_CMD_HASH_ORDER 9 |
24 | ||
4712274c OM |
25 | /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, |
26 | * but keeps the logic simple. Indeed, the whole purpose of this macro is just | |
27 | * to give some inclination as to some of the magic values used in the various | |
28 | * workarounds! | |
29 | */ | |
30 | #define CACHELINE_BYTES 64 | |
739f3abd | 31 | #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) |
4712274c | 32 | |
57e88531 CW |
33 | struct intel_hw_status_page { |
34 | struct i915_vma *vma; | |
0ca88ba0 | 35 | u32 *addr; |
8187a2b7 ZN |
36 | }; |
37 | ||
bbdc070a DG |
38 | #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) |
39 | #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) | |
cae5852d | 40 | |
bbdc070a DG |
41 | #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) |
42 | #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) | |
cae5852d | 43 | |
bbdc070a DG |
44 | #define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) |
45 | #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) | |
cae5852d | 46 | |
bbdc070a DG |
47 | #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) |
48 | #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) | |
cae5852d | 49 | |
bbdc070a DG |
50 | #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) |
51 | #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) | |
870e86dd | 52 | |
bbdc070a DG |
53 | #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) |
54 | #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) | |
e9fea574 | 55 | |
3e78998a BW |
56 | /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
57 | * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. | |
58 | */ | |
7e37f889 | 59 | enum intel_engine_hangcheck_action { |
3fe3b030 MK |
60 | ENGINE_IDLE = 0, |
61 | ENGINE_WAIT, | |
62 | ENGINE_ACTIVE_SEQNO, | |
63 | ENGINE_ACTIVE_HEAD, | |
64 | ENGINE_ACTIVE_SUBUNITS, | |
65 | ENGINE_WAIT_KICK, | |
66 | ENGINE_DEAD, | |
f2f4d82f | 67 | }; |
ad8beaea | 68 | |
3fe3b030 MK |
69 | static inline const char * |
70 | hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) | |
71 | { | |
72 | switch (a) { | |
73 | case ENGINE_IDLE: | |
74 | return "idle"; | |
75 | case ENGINE_WAIT: | |
76 | return "wait"; | |
77 | case ENGINE_ACTIVE_SEQNO: | |
78 | return "active seqno"; | |
79 | case ENGINE_ACTIVE_HEAD: | |
80 | return "active head"; | |
81 | case ENGINE_ACTIVE_SUBUNITS: | |
82 | return "active subunits"; | |
83 | case ENGINE_WAIT_KICK: | |
84 | return "wait kick"; | |
85 | case ENGINE_DEAD: | |
86 | return "dead"; | |
87 | } | |
88 | ||
89 | return "unknown"; | |
90 | } | |
b6b0fac0 | 91 | |
f9e61372 | 92 | #define I915_MAX_SLICES 3 |
d3d57927 | 93 | #define I915_MAX_SUBSLICES 8 |
f9e61372 BW |
94 | |
95 | #define instdone_slice_mask(dev_priv__) \ | |
cf819eff | 96 | (IS_GEN(dev_priv__, 7) ? \ |
0258404f | 97 | 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) |
f9e61372 BW |
98 | |
99 | #define instdone_subslice_mask(dev_priv__) \ | |
cf819eff | 100 | (IS_GEN(dev_priv__, 7) ? \ |
0258404f | 101 | 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) |
f9e61372 BW |
102 | |
103 | #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ | |
104 | for ((slice__) = 0, (subslice__) = 0; \ | |
105 | (slice__) < I915_MAX_SLICES; \ | |
106 | (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ | |
107 | (slice__) += ((subslice__) == 0)) \ | |
108 | for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ | |
109 | (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) | |
110 | ||
d636951e BW |
111 | struct intel_instdone { |
112 | u32 instdone; | |
113 | /* The following exist only in the RCS engine */ | |
114 | u32 slice_common; | |
f9e61372 BW |
115 | u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; |
116 | u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; | |
d636951e BW |
117 | }; |
118 | ||
7e37f889 | 119 | struct intel_engine_hangcheck { |
50877445 | 120 | u64 acthd; |
92cab734 | 121 | u32 seqno; |
3fe3b030 | 122 | unsigned long action_timestamp; |
d636951e | 123 | struct intel_instdone instdone; |
92cab734 MK |
124 | }; |
125 | ||
7e37f889 | 126 | struct intel_ring { |
0eb973d3 | 127 | struct i915_vma *vma; |
57e88531 | 128 | void *vaddr; |
8ee14975 | 129 | |
a89d1f92 | 130 | struct i915_timeline *timeline; |
675d9ad7 | 131 | struct list_head request_list; |
643b450a | 132 | struct list_head active_link; |
675d9ad7 | 133 | |
8ee14975 OM |
134 | u32 head; |
135 | u32 tail; | |
e6ba9992 | 136 | u32 emit; |
eca56a35 | 137 | |
605d5b32 CW |
138 | u32 space; |
139 | u32 size; | |
140 | u32 effective_size; | |
8ee14975 OM |
141 | }; |
142 | ||
e2efd130 | 143 | struct i915_gem_context; |
361b027b | 144 | struct drm_i915_reg_table; |
21076372 | 145 | |
17ee950d AS |
146 | /* |
147 | * we use a single page to load ctx workarounds so all of these | |
148 | * values are referred in terms of dwords | |
149 | * | |
150 | * struct i915_wa_ctx_bb: | |
151 | * offset: specifies batch starting position, also helpful in case | |
152 | * if we want to have multiple batches at different offsets based on | |
153 | * some criteria. It is not a requirement at the moment but provides | |
154 | * an option for future use. | |
155 | * size: size of the batch in DWORDS | |
156 | */ | |
48bb74e4 | 157 | struct i915_ctx_workarounds { |
17ee950d AS |
158 | struct i915_wa_ctx_bb { |
159 | u32 offset; | |
160 | u32 size; | |
161 | } indirect_ctx, per_ctx; | |
48bb74e4 | 162 | struct i915_vma *vma; |
17ee950d AS |
163 | }; |
164 | ||
e61e0f51 | 165 | struct i915_request; |
c81d4613 | 166 | |
022d3093 TU |
167 | #define I915_MAX_VCS 4 |
168 | #define I915_MAX_VECS 2 | |
169 | ||
237ae7c7 MW |
170 | /* |
171 | * Engine IDs definitions. | |
172 | * Keep instances of the same type engine together. | |
173 | */ | |
174 | enum intel_engine_id { | |
175 | RCS = 0, | |
176 | BCS, | |
177 | VCS, | |
178 | VCS2, | |
022d3093 TU |
179 | VCS3, |
180 | VCS4, | |
237ae7c7 | 181 | #define _VCS(n) (VCS + (n)) |
022d3093 TU |
182 | VECS, |
183 | VECS2 | |
184 | #define _VECS(n) (VECS + (n)) | |
237ae7c7 MW |
185 | }; |
186 | ||
6c067579 | 187 | struct i915_priolist { |
85f5e1f3 | 188 | struct list_head requests[I915_PRIORITY_COUNT]; |
6c067579 | 189 | struct rb_node node; |
85f5e1f3 | 190 | unsigned long used; |
6c067579 CW |
191 | int priority; |
192 | }; | |
193 | ||
85f5e1f3 CW |
194 | #define priolist_for_each_request(it, plist, idx) \ |
195 | for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ | |
196 | list_for_each_entry(it, &(plist)->requests[idx], sched.link) | |
197 | ||
198 | #define priolist_for_each_request_consume(it, n, plist, idx) \ | |
199 | for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \ | |
200 | list_for_each_entry_safe(it, n, \ | |
201 | &(plist)->requests[idx - 1], \ | |
202 | sched.link) | |
203 | ||
0f6b79fa CW |
204 | struct st_preempt_hang { |
205 | struct completion completion; | |
206 | bool inject_hang; | |
207 | }; | |
208 | ||
b620e870 MK |
209 | /** |
210 | * struct intel_engine_execlists - execlist submission queue and port state | |
211 | * | |
212 | * The struct intel_engine_execlists represents the combined logical state of | |
213 | * driver and the hardware state for execlist mode of submission. | |
214 | */ | |
215 | struct intel_engine_execlists { | |
216 | /** | |
c6dce8f1 | 217 | * @tasklet: softirq tasklet for bottom handler |
b620e870 | 218 | */ |
c6dce8f1 | 219 | struct tasklet_struct tasklet; |
b620e870 MK |
220 | |
221 | /** | |
222 | * @default_priolist: priority list for I915_PRIORITY_NORMAL | |
223 | */ | |
224 | struct i915_priolist default_priolist; | |
225 | ||
226 | /** | |
227 | * @no_priolist: priority lists disabled | |
228 | */ | |
229 | bool no_priolist; | |
230 | ||
2fc7a06a | 231 | /** |
05f0addd TD |
232 | * @submit_reg: gen-specific execlist submission register |
233 | * set to the ExecList Submission Port (elsp) register pre-Gen11 and to | |
234 | * the ExecList Submission Queue Contents register array for Gen11+ | |
2fc7a06a | 235 | */ |
05f0addd TD |
236 | u32 __iomem *submit_reg; |
237 | ||
238 | /** | |
239 | * @ctrl_reg: the enhanced execlists control register, used to load the | |
240 | * submit queue on the HW and to request preemptions to idle | |
241 | */ | |
242 | u32 __iomem *ctrl_reg; | |
2fc7a06a | 243 | |
b620e870 MK |
244 | /** |
245 | * @port: execlist port states | |
246 | * | |
247 | * For each hardware ELSP (ExecList Submission Port) we keep | |
248 | * track of the last request and the number of times we submitted | |
249 | * that port to hw. We then count the number of times the hw reports | |
250 | * a context completion or preemption. As only one context can | |
251 | * be active on hw, we limit resubmission of context to port[0]. This | |
252 | * is called Lite Restore, of the context. | |
253 | */ | |
254 | struct execlist_port { | |
255 | /** | |
256 | * @request_count: combined request and submission count | |
257 | */ | |
e61e0f51 | 258 | struct i915_request *request_count; |
b620e870 MK |
259 | #define EXECLIST_COUNT_BITS 2 |
260 | #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
261 | #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
262 | #define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) | |
263 | #define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) | |
264 | #define port_set(p, packed) ((p)->request_count = (packed)) | |
265 | #define port_isset(p) ((p)->request_count) | |
7a62cc61 | 266 | #define port_index(p, execlists) ((p) - (execlists)->port) |
b620e870 MK |
267 | |
268 | /** | |
269 | * @context_id: context ID for port | |
270 | */ | |
271 | GEM_DEBUG_DECL(u32 context_id); | |
76e70087 MK |
272 | |
273 | #define EXECLIST_MAX_PORTS 2 | |
274 | } port[EXECLIST_MAX_PORTS]; | |
275 | ||
beecec90 | 276 | /** |
4a118ecb CW |
277 | * @active: is the HW active? We consider the HW as active after |
278 | * submitting any context for execution and until we have seen the | |
279 | * last context completion event. After that, we do not expect any | |
280 | * more events until we submit, and so can park the HW. | |
281 | * | |
282 | * As we have a small number of different sources from which we feed | |
283 | * the HW, we track the state of each inside a single bitfield. | |
beecec90 | 284 | */ |
4a118ecb CW |
285 | unsigned int active; |
286 | #define EXECLISTS_ACTIVE_USER 0 | |
287 | #define EXECLISTS_ACTIVE_PREEMPT 1 | |
ba74cb10 | 288 | #define EXECLISTS_ACTIVE_HWACK 2 |
beecec90 | 289 | |
76e70087 MK |
290 | /** |
291 | * @port_mask: number of execlist ports - 1 | |
292 | */ | |
293 | unsigned int port_mask; | |
b620e870 | 294 | |
f6322edd | 295 | /** |
4d97cbe0 | 296 | * @queue_priority_hint: Highest pending priority. |
f6322edd CW |
297 | * |
298 | * When we add requests into the queue, or adjust the priority of | |
299 | * executing requests, we compute the maximum priority of those | |
300 | * pending requests. We can then use this value to determine if | |
301 | * we need to preempt the executing requests to service the queue. | |
4d97cbe0 CW |
302 | * However, since the we may have recorded the priority of an inflight |
303 | * request we wanted to preempt but since completed, at the time of | |
304 | * dequeuing the priority hint may no longer may match the highest | |
305 | * available request priority. | |
f6322edd | 306 | */ |
4d97cbe0 | 307 | int queue_priority_hint; |
f6322edd | 308 | |
b620e870 MK |
309 | /** |
310 | * @queue: queue of requests, in priority lists | |
311 | */ | |
655250a8 | 312 | struct rb_root_cached queue; |
b620e870 | 313 | |
b620e870 | 314 | /** |
bc4237ec CW |
315 | * @csb_write: control register for Context Switch buffer |
316 | * | |
317 | * Note this register may be either mmio or HWSP shadow. | |
b620e870 | 318 | */ |
bc4237ec | 319 | u32 *csb_write; |
b620e870 MK |
320 | |
321 | /** | |
bc4237ec CW |
322 | * @csb_status: status array for Context Switch buffer |
323 | * | |
324 | * Note these register may be either mmio or HWSP shadow. | |
b620e870 | 325 | */ |
bc4237ec | 326 | u32 *csb_status; |
d6376374 CW |
327 | |
328 | /** | |
329 | * @preempt_complete_status: expected CSB upon completing preemption | |
330 | */ | |
331 | u32 preempt_complete_status; | |
bc4237ec CW |
332 | |
333 | /** | |
334 | * @csb_head: context status buffer head | |
335 | */ | |
336 | u8 csb_head; | |
0f6b79fa CW |
337 | |
338 | I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) | |
b620e870 MK |
339 | }; |
340 | ||
6e516148 OM |
341 | #define INTEL_ENGINE_CS_MAX_NAME 8 |
342 | ||
c033666a CW |
343 | struct intel_engine_cs { |
344 | struct drm_i915_private *i915; | |
6e516148 | 345 | char name[INTEL_ENGINE_CS_MAX_NAME]; |
1803fcbc | 346 | |
237ae7c7 | 347 | enum intel_engine_id id; |
237ae7c7 | 348 | unsigned int hw_id; |
63ffbcda | 349 | unsigned int guc_id; |
0908180b | 350 | |
1803fcbc TU |
351 | u8 uabi_id; |
352 | u8 uabi_class; | |
353 | ||
0908180b DCS |
354 | u8 class; |
355 | u8 instance; | |
63ffbcda JL |
356 | u32 context_size; |
357 | u32 mmio_base; | |
63ffbcda | 358 | |
7e37f889 | 359 | struct intel_ring *buffer; |
a89d1f92 CW |
360 | |
361 | struct i915_timeline timeline; | |
8187a2b7 | 362 | |
d2b4b979 | 363 | struct drm_i915_gem_object *default_state; |
fe0c4935 | 364 | void *pinned_default_state; |
4e50f082 | 365 | |
688e6c72 CW |
366 | /* Rather than have every client wait upon all user interrupts, |
367 | * with the herd waking after every interrupt and each doing the | |
368 | * heavyweight seqno dance, we delegate the task (of being the | |
369 | * bottom-half of the user interrupt) to the first client. After | |
370 | * every interrupt, we wake up one client, who does the heavyweight | |
371 | * coherent seqno read and either goes back to sleep (if incomplete), | |
372 | * or wakes up all the completed clients in parallel, before then | |
373 | * transferring the bottom-half status to the next client in the queue. | |
374 | * | |
375 | * Compared to walking the entire list of waiters in a single dedicated | |
376 | * bottom-half, we reduce the latency of the first waiter by avoiding | |
377 | * a context switch, but incur additional coherent seqno reads when | |
378 | * following the chain of request breadcrumbs. Since it is most likely | |
379 | * that we have a single client waiting on each seqno, then reducing | |
380 | * the overhead of waking that client is much preferred. | |
381 | */ | |
382 | struct intel_breadcrumbs { | |
61d3dc70 CW |
383 | spinlock_t irq_lock; /* protects irq_*; irqsafe */ |
384 | struct intel_wait *irq_wait; /* oldest waiter by retirement */ | |
385 | ||
386 | spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ | |
688e6c72 | 387 | struct rb_root waiters; /* sorted by retirement, priority */ |
cd46c545 | 388 | struct list_head signals; /* sorted by retirement */ |
c81d4613 | 389 | struct task_struct *signaler; /* used for fence signalling */ |
cd46c545 | 390 | |
688e6c72 | 391 | struct timer_list fake_irq; /* used after a missed interrupt */ |
83348ba8 CW |
392 | struct timer_list hangcheck; /* detect missed interrupts */ |
393 | ||
2246bea6 | 394 | unsigned int hangcheck_interrupts; |
bcbd5c33 | 395 | unsigned int irq_enabled; |
78796877 | 396 | unsigned int irq_count; |
aca34b6e | 397 | |
67b807a8 | 398 | bool irq_armed : 1; |
688e6c72 CW |
399 | } breadcrumbs; |
400 | ||
b46a33e2 TU |
401 | struct { |
402 | /** | |
403 | * @enable: Bitmask of enable sample events on this engine. | |
404 | * | |
405 | * Bits correspond to sample event types, for instance | |
406 | * I915_SAMPLE_QUEUED is bit 0 etc. | |
407 | */ | |
408 | u32 enable; | |
409 | /** | |
410 | * @enable_count: Reference count for the enabled samplers. | |
411 | * | |
412 | * Index number corresponds to the bit number from @enable. | |
413 | */ | |
414 | unsigned int enable_count[I915_PMU_SAMPLE_BITS]; | |
415 | /** | |
416 | * @sample: Counter values for sampling events. | |
417 | * | |
418 | * Our internal timer stores the current counters in this field. | |
419 | */ | |
b552ae44 | 420 | #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) |
b46a33e2 TU |
421 | struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; |
422 | } pmu; | |
423 | ||
06fbca71 CW |
424 | /* |
425 | * A pool of objects to use as shadow copies of client batch buffers | |
426 | * when the command parser is enabled. Prevents the client from | |
427 | * modifying the batch contents after software parsing. | |
428 | */ | |
429 | struct i915_gem_batch_pool batch_pool; | |
430 | ||
8187a2b7 | 431 | struct intel_hw_status_page status_page; |
17ee950d | 432 | struct i915_ctx_workarounds wa_ctx; |
452420d2 | 433 | struct i915_wa_list ctx_wa_list; |
4a15c75c | 434 | struct i915_wa_list wa_list; |
69bcdecf | 435 | struct i915_wa_list whitelist; |
8187a2b7 | 436 | |
61ff75ac CW |
437 | u32 irq_keep_mask; /* always keep these interrupts */ |
438 | u32 irq_enable_mask; /* bitmask to enable ring interrupt */ | |
38a0f2db DG |
439 | void (*irq_enable)(struct intel_engine_cs *engine); |
440 | void (*irq_disable)(struct intel_engine_cs *engine); | |
8187a2b7 | 441 | |
38a0f2db | 442 | int (*init_hw)(struct intel_engine_cs *engine); |
5adfb772 CW |
443 | |
444 | struct { | |
eb8d0f5a CW |
445 | void (*prepare)(struct intel_engine_cs *engine); |
446 | void (*reset)(struct intel_engine_cs *engine, bool stalled); | |
5adfb772 CW |
447 | void (*finish)(struct intel_engine_cs *engine); |
448 | } reset; | |
8187a2b7 | 449 | |
aba5e278 CW |
450 | void (*park)(struct intel_engine_cs *engine); |
451 | void (*unpark)(struct intel_engine_cs *engine); | |
452 | ||
ff44ad51 CW |
453 | void (*set_default_submission)(struct intel_engine_cs *engine); |
454 | ||
1fc44d9b CW |
455 | struct intel_context *(*context_pin)(struct intel_engine_cs *engine, |
456 | struct i915_gem_context *ctx); | |
457 | ||
e61e0f51 CW |
458 | int (*request_alloc)(struct i915_request *rq); |
459 | int (*init_context)(struct i915_request *rq); | |
86d7f238 | 460 | |
e61e0f51 | 461 | int (*emit_flush)(struct i915_request *request, u32 mode); |
ddd66c51 CW |
462 | #define EMIT_INVALIDATE BIT(0) |
463 | #define EMIT_FLUSH BIT(1) | |
464 | #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) | |
e61e0f51 | 465 | int (*emit_bb_start)(struct i915_request *rq, |
ddd66c51 CW |
466 | u64 offset, u32 length, |
467 | unsigned int dispatch_flags); | |
468 | #define I915_DISPATCH_SECURE BIT(0) | |
469 | #define I915_DISPATCH_PINNED BIT(1) | |
85474441 CW |
470 | int (*emit_init_breadcrumb)(struct i915_request *rq); |
471 | u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, | |
472 | u32 *cs); | |
473 | unsigned int emit_fini_breadcrumb_dw; | |
5590af3e CW |
474 | |
475 | /* Pass the request to the hardware queue (e.g. directly into | |
476 | * the legacy ringbuffer or to the end of an execlist). | |
477 | * | |
478 | * This is called from an atomic context with irqs disabled; must | |
479 | * be irq safe. | |
480 | */ | |
e61e0f51 | 481 | void (*submit_request)(struct i915_request *rq); |
5590af3e | 482 | |
e2f3496e CW |
483 | /* |
484 | * Call when the priority on a request has changed and it and its | |
0de9136d CW |
485 | * dependencies may need rescheduling. Note the request itself may |
486 | * not be ready to run! | |
0de9136d | 487 | */ |
b7268c5e CW |
488 | void (*schedule)(struct i915_request *request, |
489 | const struct i915_sched_attr *attr); | |
0de9136d | 490 | |
27a5f61b CW |
491 | /* |
492 | * Cancel all requests on the hardware, or queued for execution. | |
493 | * This should only cancel the ready requests that have been | |
494 | * submitted to the engine (via the engine->submit_request callback). | |
495 | * This is called when marking the device as wedged. | |
496 | */ | |
497 | void (*cancel_requests)(struct intel_engine_cs *engine); | |
498 | ||
38a0f2db | 499 | void (*cleanup)(struct intel_engine_cs *engine); |
ebc348b2 | 500 | |
b620e870 | 501 | struct intel_engine_execlists execlists; |
4da46e1e | 502 | |
e8a9c58f CW |
503 | /* Contexts are pinned whilst they are active on the GPU. The last |
504 | * context executed remains active whilst the GPU is idle - the | |
505 | * switch away and write to the context object only occurs on the | |
506 | * next execution. Contexts are only unpinned on retirement of the | |
507 | * following request ensuring that we can always write to the object | |
508 | * on the context switch even after idling. Across suspend, we switch | |
509 | * to the kernel context and trash it as the save may not happen | |
510 | * before the hardware is powered down. | |
511 | */ | |
1fc44d9b | 512 | struct intel_context *last_retired_context; |
e8a9c58f | 513 | |
3fc03069 CD |
514 | /* status_notifier: list of callbacks for context-switch changes */ |
515 | struct atomic_notifier_head context_status_notifier; | |
516 | ||
7e37f889 | 517 | struct intel_engine_hangcheck hangcheck; |
92cab734 | 518 | |
439e2ee4 | 519 | #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) |
cf669b4e | 520 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) |
2a694feb | 521 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) |
439e2ee4 | 522 | unsigned int flags; |
44e895a8 | 523 | |
351e3db2 | 524 | /* |
44e895a8 | 525 | * Table of commands the command parser needs to know about |
33a051a5 | 526 | * for this engine. |
351e3db2 | 527 | */ |
44e895a8 | 528 | DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); |
351e3db2 BV |
529 | |
530 | /* | |
531 | * Table of registers allowed in commands that read/write registers. | |
532 | */ | |
361b027b JJ |
533 | const struct drm_i915_reg_table *reg_tables; |
534 | int reg_table_count; | |
351e3db2 BV |
535 | |
536 | /* | |
537 | * Returns the bitmask for the length field of the specified command. | |
538 | * Return 0 for an unrecognized/invalid command. | |
539 | * | |
33a051a5 | 540 | * If the command parser finds an entry for a command in the engine's |
351e3db2 | 541 | * cmd_tables, it gets the command's length based on the table entry. |
33a051a5 CW |
542 | * If not, it calls this function to determine the per-engine length |
543 | * field encoding for the command (i.e. different opcode ranges use | |
544 | * certain bits to encode the command length in the header). | |
351e3db2 BV |
545 | */ |
546 | u32 (*get_cmd_length_mask)(u32 cmd_header); | |
30e17b78 TU |
547 | |
548 | struct { | |
549 | /** | |
550 | * @lock: Lock protecting the below fields. | |
551 | */ | |
741258cd | 552 | seqlock_t lock; |
30e17b78 TU |
553 | /** |
554 | * @enabled: Reference count indicating number of listeners. | |
555 | */ | |
556 | unsigned int enabled; | |
557 | /** | |
558 | * @active: Number of contexts currently scheduled in. | |
559 | */ | |
560 | unsigned int active; | |
561 | /** | |
562 | * @enabled_at: Timestamp when busy stats were enabled. | |
563 | */ | |
564 | ktime_t enabled_at; | |
565 | /** | |
566 | * @start: Timestamp of the last idle to active transition. | |
567 | * | |
568 | * Idle is defined as active == 0, active is active > 0. | |
569 | */ | |
570 | ktime_t start; | |
571 | /** | |
572 | * @total: Total time this engine was busy. | |
573 | * | |
574 | * Accumulated time not counting the most recent block in cases | |
575 | * where engine is currently busy (active > 0). | |
576 | */ | |
577 | ktime_t total; | |
578 | } stats; | |
8187a2b7 ZN |
579 | }; |
580 | ||
2a694feb CW |
581 | static inline bool |
582 | intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) | |
439e2ee4 TU |
583 | { |
584 | return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; | |
585 | } | |
586 | ||
2a694feb CW |
587 | static inline bool |
588 | intel_engine_supports_stats(const struct intel_engine_cs *engine) | |
cf669b4e TU |
589 | { |
590 | return engine->flags & I915_ENGINE_SUPPORTS_STATS; | |
591 | } | |
592 | ||
2a694feb CW |
593 | static inline bool |
594 | intel_engine_has_preemption(const struct intel_engine_cs *engine) | |
595 | { | |
596 | return engine->flags & I915_ENGINE_HAS_PREEMPTION; | |
597 | } | |
598 | ||
599 | static inline bool __execlists_need_preempt(int prio, int last) | |
600 | { | |
601 | return prio > max(0, last); | |
602 | } | |
603 | ||
4a118ecb CW |
604 | static inline void |
605 | execlists_set_active(struct intel_engine_execlists *execlists, | |
606 | unsigned int bit) | |
607 | { | |
608 | __set_bit(bit, (unsigned long *)&execlists->active); | |
609 | } | |
610 | ||
f2605207 CW |
611 | static inline bool |
612 | execlists_set_active_once(struct intel_engine_execlists *execlists, | |
613 | unsigned int bit) | |
614 | { | |
615 | return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); | |
616 | } | |
617 | ||
4a118ecb CW |
618 | static inline void |
619 | execlists_clear_active(struct intel_engine_execlists *execlists, | |
620 | unsigned int bit) | |
621 | { | |
622 | __clear_bit(bit, (unsigned long *)&execlists->active); | |
623 | } | |
624 | ||
0051163a CW |
625 | static inline void |
626 | execlists_clear_all_active(struct intel_engine_execlists *execlists) | |
627 | { | |
628 | execlists->active = 0; | |
629 | } | |
630 | ||
4a118ecb CW |
631 | static inline bool |
632 | execlists_is_active(const struct intel_engine_execlists *execlists, | |
633 | unsigned int bit) | |
634 | { | |
635 | return test_bit(bit, (unsigned long *)&execlists->active); | |
636 | } | |
637 | ||
f2605207 CW |
638 | void execlists_user_begin(struct intel_engine_execlists *execlists, |
639 | const struct execlist_port *port); | |
640 | void execlists_user_end(struct intel_engine_execlists *execlists); | |
641 | ||
c41937fd MW |
642 | void |
643 | execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); | |
644 | ||
645 | void | |
646 | execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); | |
647 | ||
76e70087 MK |
648 | static inline unsigned int |
649 | execlists_num_ports(const struct intel_engine_execlists * const execlists) | |
650 | { | |
651 | return execlists->port_mask + 1; | |
652 | } | |
653 | ||
f2605207 | 654 | static inline struct execlist_port * |
7a62cc61 MK |
655 | execlists_port_complete(struct intel_engine_execlists * const execlists, |
656 | struct execlist_port * const port) | |
657 | { | |
76e70087 | 658 | const unsigned int m = execlists->port_mask; |
7a62cc61 MK |
659 | |
660 | GEM_BUG_ON(port_index(port, execlists) != 0); | |
4a118ecb | 661 | GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); |
7a62cc61 | 662 | |
76e70087 MK |
663 | memmove(port, port + 1, m * sizeof(struct execlist_port)); |
664 | memset(port + m, 0, sizeof(struct execlist_port)); | |
f2605207 CW |
665 | |
666 | return port; | |
7a62cc61 MK |
667 | } |
668 | ||
59ce1310 | 669 | static inline unsigned int |
67d97da3 | 670 | intel_engine_flag(const struct intel_engine_cs *engine) |
96154f2f | 671 | { |
59ce1310 | 672 | return BIT(engine->id); |
96154f2f DV |
673 | } |
674 | ||
8187a2b7 | 675 | static inline u32 |
3ceda3a4 | 676 | intel_read_status_page(const struct intel_engine_cs *engine, int reg) |
8187a2b7 | 677 | { |
4225d0f2 | 678 | /* Ensure that the compiler doesn't optimize away the load. */ |
0ca88ba0 | 679 | return READ_ONCE(engine->status_page.addr[reg]); |
8187a2b7 ZN |
680 | } |
681 | ||
b70ec5bf | 682 | static inline void |
9a29dd85 | 683 | intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) |
b70ec5bf | 684 | { |
9a29dd85 CW |
685 | /* Writing into the status page should be done sparingly. Since |
686 | * we do when we are uncertain of the device state, we take a bit | |
687 | * of extra paranoia to try and ensure that the HWS takes the value | |
688 | * we give and that it doesn't end up trapped inside the CPU! | |
689 | */ | |
690 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) { | |
691 | mb(); | |
0ca88ba0 CW |
692 | clflush(&engine->status_page.addr[reg]); |
693 | engine->status_page.addr[reg] = value; | |
694 | clflush(&engine->status_page.addr[reg]); | |
9a29dd85 CW |
695 | mb(); |
696 | } else { | |
0ca88ba0 | 697 | WRITE_ONCE(engine->status_page.addr[reg], value); |
9a29dd85 | 698 | } |
b70ec5bf MK |
699 | } |
700 | ||
e2828914 | 701 | /* |
311bd68e CW |
702 | * Reads a dword out of the status page, which is written to from the command |
703 | * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or | |
704 | * MI_STORE_DATA_IMM. | |
705 | * | |
706 | * The following dwords have a reserved meaning: | |
707 | * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. | |
708 | * 0x04: ring 0 head pointer | |
709 | * 0x05: ring 1 head pointer (915-class) | |
710 | * 0x06: ring 2 head pointer (915-class) | |
711 | * 0x10-0x1b: Context status DWords (GM45) | |
712 | * 0x1f: Last written status offset. (GM45) | |
b07da53c | 713 | * 0x20-0x2f: Reserved (Gen6+) |
311bd68e | 714 | * |
b07da53c | 715 | * The area from dword 0x30 to 0x3ff is available for driver usage. |
311bd68e | 716 | */ |
b07da53c | 717 | #define I915_GEM_HWS_INDEX 0x30 |
832a67bd CW |
718 | #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) |
719 | #define I915_GEM_HWS_PREEMPT 0x32 | |
720 | #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) | |
52954edd CW |
721 | #define I915_GEM_HWS_SEQNO 0x40 |
722 | #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) | |
723 | #define I915_GEM_HWS_SCRATCH 0x80 | |
832a67bd | 724 | #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) |
311bd68e | 725 | |
6d2cb5aa | 726 | #define I915_HWS_CSB_BUF0_INDEX 0x10 |
767a983a CW |
727 | #define I915_HWS_CSB_WRITE_INDEX 0x1f |
728 | #define CNL_HWS_CSB_WRITE_INDEX 0x2f | |
6d2cb5aa | 729 | |
7e37f889 | 730 | struct intel_ring * |
65fcb806 | 731 | intel_engine_create_ring(struct intel_engine_cs *engine, |
a89d1f92 | 732 | struct i915_timeline *timeline, |
65fcb806 | 733 | int size); |
5503cb0d | 734 | int intel_ring_pin(struct intel_ring *ring); |
e6ba9992 | 735 | void intel_ring_reset(struct intel_ring *ring, u32 tail); |
95aebcb2 | 736 | unsigned int intel_ring_update_space(struct intel_ring *ring); |
aad29fbb | 737 | void intel_ring_unpin(struct intel_ring *ring); |
7e37f889 | 738 | void intel_ring_free(struct intel_ring *ring); |
84c2377f | 739 | |
7e37f889 CW |
740 | void intel_engine_stop(struct intel_engine_cs *engine); |
741 | void intel_engine_cleanup(struct intel_engine_cs *engine); | |
96f298aa | 742 | |
821ed7df CW |
743 | void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); |
744 | ||
e61e0f51 | 745 | int __must_check intel_ring_cacheline_align(struct i915_request *rq); |
406ea8d2 | 746 | |
e61e0f51 | 747 | u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); |
406ea8d2 | 748 | |
e61e0f51 | 749 | static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) |
09246732 | 750 | { |
8f942018 CW |
751 | /* Dummy function. |
752 | * | |
753 | * This serves as a placeholder in the code so that the reader | |
754 | * can compare against the preceding intel_ring_begin() and | |
755 | * check that the number of dwords emitted matches the space | |
756 | * reserved for the command packet (i.e. the value passed to | |
757 | * intel_ring_begin()). | |
c5efa1ad | 758 | */ |
e61e0f51 | 759 | GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); |
8f942018 CW |
760 | } |
761 | ||
e61e0f51 | 762 | static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) |
450362d3 CW |
763 | { |
764 | return pos & (ring->size - 1); | |
765 | } | |
766 | ||
41d37680 CW |
767 | static inline bool |
768 | intel_ring_offset_valid(const struct intel_ring *ring, | |
769 | unsigned int pos) | |
770 | { | |
771 | if (pos & -ring->size) /* must be strictly within the ring */ | |
772 | return false; | |
773 | ||
774 | if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ | |
775 | return false; | |
776 | ||
777 | return true; | |
778 | } | |
779 | ||
e61e0f51 | 780 | static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) |
8f942018 CW |
781 | { |
782 | /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ | |
e61e0f51 CW |
783 | u32 offset = addr - rq->ring->vaddr; |
784 | GEM_BUG_ON(offset > rq->ring->size); | |
785 | return intel_ring_wrap(rq->ring, offset); | |
09246732 | 786 | } |
406ea8d2 | 787 | |
ed1501d4 CW |
788 | static inline void |
789 | assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) | |
790 | { | |
41d37680 | 791 | GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); |
605d5b32 CW |
792 | |
793 | /* | |
794 | * "Ring Buffer Use" | |
795 | * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 | |
796 | * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 | |
797 | * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 | |
798 | * "If the Ring Buffer Head Pointer and the Tail Pointer are on the | |
799 | * same cacheline, the Head Pointer must not be greater than the Tail | |
800 | * Pointer." | |
801 | * | |
802 | * We use ring->head as the last known location of the actual RING_HEAD, | |
803 | * it may have advanced but in the worst case it is equally the same | |
804 | * as ring->head and so we should never program RING_TAIL to advance | |
805 | * into the same cacheline as ring->head. | |
806 | */ | |
807 | #define cacheline(a) round_down(a, CACHELINE_BYTES) | |
808 | GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && | |
809 | tail < ring->head); | |
810 | #undef cacheline | |
ed1501d4 CW |
811 | } |
812 | ||
e6ba9992 CW |
813 | static inline unsigned int |
814 | intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) | |
815 | { | |
816 | /* Whilst writes to the tail are strictly order, there is no | |
817 | * serialisation between readers and the writers. The tail may be | |
e61e0f51 | 818 | * read by i915_request_retire() just as it is being updated |
e6ba9992 CW |
819 | * by execlists, as although the breadcrumb is complete, the context |
820 | * switch hasn't been seen. | |
821 | */ | |
822 | assert_ring_tail_valid(ring, tail); | |
823 | ring->tail = tail; | |
824 | return tail; | |
825 | } | |
09246732 | 826 | |
6faf5916 | 827 | void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); |
8187a2b7 | 828 | |
52954edd | 829 | int intel_engine_setup_common(struct intel_engine_cs *engine); |
019bf277 | 830 | int intel_engine_init_common(struct intel_engine_cs *engine); |
96a945aa | 831 | void intel_engine_cleanup_common(struct intel_engine_cs *engine); |
019bf277 | 832 | |
8b3e2d36 TU |
833 | int intel_init_render_ring_buffer(struct intel_engine_cs *engine); |
834 | int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); | |
8b3e2d36 TU |
835 | int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); |
836 | int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); | |
8187a2b7 | 837 | |
3f6e9822 | 838 | int intel_engine_stop_cs(struct intel_engine_cs *engine); |
a99b32a6 | 839 | void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); |
3f6e9822 | 840 | |
060f2322 CW |
841 | void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); |
842 | ||
3ceda3a4 CW |
843 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); |
844 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); | |
1b36595f | 845 | |
cb399eab CW |
846 | static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) |
847 | { | |
97f06158 CW |
848 | /* |
849 | * We are only peeking at the tail of the submit queue (and not the | |
cb399eab CW |
850 | * queue itself) in order to gain a hint as to the current active |
851 | * state of the engine. Callers are not expected to be taking | |
852 | * engine->timeline->lock, nor are they expected to be concerned | |
853 | * wtih serialising this hint with anything, so document it as | |
854 | * a hint and nothing more. | |
855 | */ | |
a89d1f92 | 856 | return READ_ONCE(engine->timeline.seqno); |
cb399eab CW |
857 | } |
858 | ||
97f06158 CW |
859 | static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) |
860 | { | |
861 | return intel_read_status_page(engine, I915_GEM_HWS_INDEX); | |
862 | } | |
863 | ||
864 | static inline bool intel_engine_signaled(struct intel_engine_cs *engine, | |
865 | u32 seqno) | |
866 | { | |
867 | return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); | |
868 | } | |
869 | ||
870 | static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, | |
871 | u32 seqno) | |
872 | { | |
873 | GEM_BUG_ON(!seqno); | |
874 | return intel_engine_signaled(engine, seqno); | |
875 | } | |
876 | ||
877 | static inline bool intel_engine_has_started(struct intel_engine_cs *engine, | |
878 | u32 seqno) | |
879 | { | |
880 | GEM_BUG_ON(!seqno); | |
881 | return intel_engine_signaled(engine, seqno - 1); | |
882 | } | |
883 | ||
0e704476 CW |
884 | void intel_engine_get_instdone(struct intel_engine_cs *engine, |
885 | struct intel_instdone *instdone); | |
886 | ||
688e6c72 | 887 | /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ |
688e6c72 CW |
888 | int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); |
889 | ||
e3be4079 | 890 | static inline void intel_wait_init(struct intel_wait *wait) |
688e6c72 CW |
891 | { |
892 | wait->tsk = current; | |
e3be4079 | 893 | wait->request = NULL; |
754c9fd5 CW |
894 | } |
895 | ||
896 | static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) | |
897 | { | |
898 | wait->tsk = current; | |
899 | wait->seqno = seqno; | |
900 | } | |
901 | ||
902 | static inline bool intel_wait_has_seqno(const struct intel_wait *wait) | |
903 | { | |
904 | return wait->seqno; | |
905 | } | |
906 | ||
907 | static inline bool | |
908 | intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) | |
909 | { | |
688e6c72 | 910 | wait->seqno = seqno; |
754c9fd5 CW |
911 | return intel_wait_has_seqno(wait); |
912 | } | |
913 | ||
914 | static inline bool | |
915 | intel_wait_update_request(struct intel_wait *wait, | |
e61e0f51 | 916 | const struct i915_request *rq) |
754c9fd5 | 917 | { |
e61e0f51 | 918 | return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); |
754c9fd5 CW |
919 | } |
920 | ||
921 | static inline bool | |
922 | intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) | |
923 | { | |
924 | return wait->seqno == seqno; | |
925 | } | |
926 | ||
927 | static inline bool | |
928 | intel_wait_check_request(const struct intel_wait *wait, | |
e61e0f51 | 929 | const struct i915_request *rq) |
754c9fd5 | 930 | { |
e61e0f51 | 931 | return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); |
688e6c72 CW |
932 | } |
933 | ||
934 | static inline bool intel_wait_complete(const struct intel_wait *wait) | |
935 | { | |
936 | return RB_EMPTY_NODE(&wait->node); | |
937 | } | |
938 | ||
939 | bool intel_engine_add_wait(struct intel_engine_cs *engine, | |
940 | struct intel_wait *wait); | |
941 | void intel_engine_remove_wait(struct intel_engine_cs *engine, | |
942 | struct intel_wait *wait); | |
6f9ec414 | 943 | bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); |
e61e0f51 | 944 | void intel_engine_cancel_signaling(struct i915_request *request); |
688e6c72 | 945 | |
dbd6ef29 | 946 | static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) |
688e6c72 | 947 | { |
61d3dc70 | 948 | return READ_ONCE(engine->breadcrumbs.irq_wait); |
688e6c72 CW |
949 | } |
950 | ||
8d769ea7 CW |
951 | unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); |
952 | #define ENGINE_WAKEUP_WAITER BIT(0) | |
67b807a8 CW |
953 | #define ENGINE_WAKEUP_ASLEEP BIT(1) |
954 | ||
bcbd5c33 CW |
955 | void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); |
956 | void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); | |
957 | ||
67b807a8 CW |
958 | void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); |
959 | void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); | |
688e6c72 | 960 | |
ad07dfcd | 961 | void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 962 | void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 963 | |
9f235dfa TU |
964 | static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) |
965 | { | |
966 | memset(batch, 0, 6 * sizeof(u32)); | |
967 | ||
968 | batch[0] = GFX_OP_PIPE_CONTROL(6); | |
969 | batch[1] = flags; | |
970 | batch[2] = offset; | |
971 | ||
972 | return batch + 6; | |
973 | } | |
974 | ||
df77cd83 | 975 | static inline u32 * |
6a623729 | 976 | gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) |
df77cd83 MW |
977 | { |
978 | /* We're using qword write, offset should be aligned to 8 bytes. */ | |
979 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
980 | ||
981 | /* w/a for post sync ops following a GPGPU operation we | |
982 | * need a prior CS_STALL, which is emitted by the flush | |
983 | * following the batch. | |
984 | */ | |
985 | *cs++ = GFX_OP_PIPE_CONTROL(6); | |
6a623729 | 986 | *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; |
df77cd83 MW |
987 | *cs++ = gtt_offset; |
988 | *cs++ = 0; | |
989 | *cs++ = value; | |
990 | /* We're thrashing one dword of HWS. */ | |
991 | *cs++ = 0; | |
992 | ||
993 | return cs; | |
994 | } | |
995 | ||
996 | static inline u32 * | |
997 | gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) | |
998 | { | |
999 | /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ | |
1000 | GEM_BUG_ON(gtt_offset & (1 << 5)); | |
1001 | /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ | |
1002 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1003 | ||
1004 | *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; | |
1005 | *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; | |
1006 | *cs++ = 0; | |
1007 | *cs++ = value; | |
1008 | ||
1009 | return cs; | |
1010 | } | |
1011 | ||
eb8d0f5a CW |
1012 | static inline void intel_engine_reset(struct intel_engine_cs *engine, |
1013 | bool stalled) | |
1014 | { | |
1015 | if (engine->reset.reset) | |
1016 | engine->reset.reset(engine, stalled); | |
1017 | } | |
1018 | ||
55277e1f | 1019 | void intel_engines_sanitize(struct drm_i915_private *i915, bool force); |
4fdd5b4e | 1020 | |
5400367a | 1021 | bool intel_engine_is_idle(struct intel_engine_cs *engine); |
05425249 | 1022 | bool intel_engines_are_idle(struct drm_i915_private *dev_priv); |
5400367a | 1023 | |
20ccd4d3 | 1024 | bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); |
01278cb1 | 1025 | void intel_engine_lost_context(struct intel_engine_cs *engine); |
20ccd4d3 | 1026 | |
aba5e278 CW |
1027 | void intel_engines_park(struct drm_i915_private *i915); |
1028 | void intel_engines_unpark(struct drm_i915_private *i915); | |
1029 | ||
ff44ad51 | 1030 | void intel_engines_reset_default_submission(struct drm_i915_private *i915); |
d2b4b979 | 1031 | unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); |
ff44ad51 | 1032 | |
90cad095 | 1033 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine); |
f2f5c061 | 1034 | |
0db18b17 CW |
1035 | __printf(3, 4) |
1036 | void intel_engine_dump(struct intel_engine_cs *engine, | |
1037 | struct drm_printer *m, | |
1038 | const char *header, ...); | |
f636edb2 | 1039 | |
b46a33e2 TU |
1040 | struct intel_engine_cs * |
1041 | intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); | |
1042 | ||
30e17b78 TU |
1043 | static inline void intel_engine_context_in(struct intel_engine_cs *engine) |
1044 | { | |
1045 | unsigned long flags; | |
1046 | ||
1047 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1048 | return; | |
1049 | ||
741258cd | 1050 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1051 | |
1052 | if (engine->stats.enabled > 0) { | |
1053 | if (engine->stats.active++ == 0) | |
1054 | engine->stats.start = ktime_get(); | |
1055 | GEM_BUG_ON(engine->stats.active == 0); | |
1056 | } | |
1057 | ||
741258cd | 1058 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1059 | } |
1060 | ||
1061 | static inline void intel_engine_context_out(struct intel_engine_cs *engine) | |
1062 | { | |
1063 | unsigned long flags; | |
1064 | ||
1065 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1066 | return; | |
1067 | ||
741258cd | 1068 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1069 | |
1070 | if (engine->stats.enabled > 0) { | |
1071 | ktime_t last; | |
1072 | ||
1073 | if (engine->stats.active && --engine->stats.active == 0) { | |
1074 | /* | |
1075 | * Decrement the active context count and in case GPU | |
1076 | * is now idle add up to the running total. | |
1077 | */ | |
1078 | last = ktime_sub(ktime_get(), engine->stats.start); | |
1079 | ||
1080 | engine->stats.total = ktime_add(engine->stats.total, | |
1081 | last); | |
1082 | } else if (engine->stats.active == 0) { | |
1083 | /* | |
1084 | * After turning on engine stats, context out might be | |
1085 | * the first event in which case we account from the | |
1086 | * time stats gathering was turned on. | |
1087 | */ | |
1088 | last = ktime_sub(ktime_get(), engine->stats.enabled_at); | |
1089 | ||
1090 | engine->stats.total = ktime_add(engine->stats.total, | |
1091 | last); | |
1092 | } | |
1093 | } | |
1094 | ||
741258cd | 1095 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1096 | } |
1097 | ||
1098 | int intel_enable_engine_stats(struct intel_engine_cs *engine); | |
1099 | void intel_disable_engine_stats(struct intel_engine_cs *engine); | |
1100 | ||
1101 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); | |
1102 | ||
0f6b79fa CW |
1103 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1104 | ||
1105 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1106 | { | |
1107 | if (!execlists->preempt_hang.inject_hang) | |
1108 | return false; | |
1109 | ||
1110 | complete(&execlists->preempt_hang.completion); | |
1111 | return true; | |
1112 | } | |
1113 | ||
1114 | #else | |
1115 | ||
1116 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1117 | { | |
1118 | return false; | |
1119 | } | |
1120 | ||
1121 | #endif | |
1122 | ||
8187a2b7 | 1123 | #endif /* _INTEL_RINGBUFFER_H_ */ |