drm/i915: Rename execlists->queue_priority to queue_priority_hint
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
CommitLineData
77c8fdae 1/* SPDX-License-Identifier: MIT */
8187a2b7
ZN
2#ifndef _INTEL_RINGBUFFER_H_
3#define _INTEL_RINGBUFFER_H_
4
d78aa650
DV
5#include <drm/drm_util.h>
6
44e895a8 7#include <linux/hashtable.h>
741258cd 8#include <linux/seqlock.h>
e61e0f51 9
06fbca71 10#include "i915_gem_batch_pool.h"
e61e0f51 11
c080363f 12#include "i915_reg.h"
b46a33e2 13#include "i915_pmu.h"
e61e0f51 14#include "i915_request.h"
f97fbf96 15#include "i915_selftest.h"
a89d1f92 16#include "i915_timeline.h"
c080363f 17#include "intel_gpu_commands.h"
4a15c75c 18#include "intel_workarounds.h"
44e895a8 19
f636edb2 20struct drm_printer;
b7268c5e 21struct i915_sched_attr;
f636edb2 22
44e895a8
BV
23#define I915_CMD_HASH_ORDER 9
24
4712274c
OM
25/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
26 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
27 * to give some inclination as to some of the magic values used in the various
28 * workarounds!
29 */
30#define CACHELINE_BYTES 64
739f3abd 31#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
4712274c 32
57e88531
CW
33struct intel_hw_status_page {
34 struct i915_vma *vma;
0ca88ba0 35 u32 *addr;
8187a2b7
ZN
36};
37
bbdc070a
DG
38#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
39#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
cae5852d 40
bbdc070a
DG
41#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
42#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
cae5852d 43
bbdc070a
DG
44#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
45#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
cae5852d 46
bbdc070a
DG
47#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
48#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
cae5852d 49
bbdc070a
DG
50#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
51#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
870e86dd 52
bbdc070a
DG
53#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
54#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
e9fea574 55
3e78998a
BW
56/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
57 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
58 */
7e37f889 59enum intel_engine_hangcheck_action {
3fe3b030
MK
60 ENGINE_IDLE = 0,
61 ENGINE_WAIT,
62 ENGINE_ACTIVE_SEQNO,
63 ENGINE_ACTIVE_HEAD,
64 ENGINE_ACTIVE_SUBUNITS,
65 ENGINE_WAIT_KICK,
66 ENGINE_DEAD,
f2f4d82f 67};
ad8beaea 68
3fe3b030
MK
69static inline const char *
70hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
71{
72 switch (a) {
73 case ENGINE_IDLE:
74 return "idle";
75 case ENGINE_WAIT:
76 return "wait";
77 case ENGINE_ACTIVE_SEQNO:
78 return "active seqno";
79 case ENGINE_ACTIVE_HEAD:
80 return "active head";
81 case ENGINE_ACTIVE_SUBUNITS:
82 return "active subunits";
83 case ENGINE_WAIT_KICK:
84 return "wait kick";
85 case ENGINE_DEAD:
86 return "dead";
87 }
88
89 return "unknown";
90}
b6b0fac0 91
f9e61372 92#define I915_MAX_SLICES 3
d3d57927 93#define I915_MAX_SUBSLICES 8
f9e61372
BW
94
95#define instdone_slice_mask(dev_priv__) \
cf819eff 96 (IS_GEN(dev_priv__, 7) ? \
0258404f 97 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
f9e61372
BW
98
99#define instdone_subslice_mask(dev_priv__) \
cf819eff 100 (IS_GEN(dev_priv__, 7) ? \
0258404f 101 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
f9e61372
BW
102
103#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
104 for ((slice__) = 0, (subslice__) = 0; \
105 (slice__) < I915_MAX_SLICES; \
106 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
107 (slice__) += ((subslice__) == 0)) \
108 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
109 (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
110
d636951e
BW
111struct intel_instdone {
112 u32 instdone;
113 /* The following exist only in the RCS engine */
114 u32 slice_common;
f9e61372
BW
115 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
116 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
d636951e
BW
117};
118
7e37f889 119struct intel_engine_hangcheck {
50877445 120 u64 acthd;
92cab734 121 u32 seqno;
3fe3b030 122 unsigned long action_timestamp;
d636951e 123 struct intel_instdone instdone;
92cab734
MK
124};
125
7e37f889 126struct intel_ring {
0eb973d3 127 struct i915_vma *vma;
57e88531 128 void *vaddr;
8ee14975 129
a89d1f92 130 struct i915_timeline *timeline;
675d9ad7 131 struct list_head request_list;
643b450a 132 struct list_head active_link;
675d9ad7 133
8ee14975
OM
134 u32 head;
135 u32 tail;
e6ba9992 136 u32 emit;
eca56a35 137
605d5b32
CW
138 u32 space;
139 u32 size;
140 u32 effective_size;
8ee14975
OM
141};
142
e2efd130 143struct i915_gem_context;
361b027b 144struct drm_i915_reg_table;
21076372 145
17ee950d
AS
146/*
147 * we use a single page to load ctx workarounds so all of these
148 * values are referred in terms of dwords
149 *
150 * struct i915_wa_ctx_bb:
151 * offset: specifies batch starting position, also helpful in case
152 * if we want to have multiple batches at different offsets based on
153 * some criteria. It is not a requirement at the moment but provides
154 * an option for future use.
155 * size: size of the batch in DWORDS
156 */
48bb74e4 157struct i915_ctx_workarounds {
17ee950d
AS
158 struct i915_wa_ctx_bb {
159 u32 offset;
160 u32 size;
161 } indirect_ctx, per_ctx;
48bb74e4 162 struct i915_vma *vma;
17ee950d
AS
163};
164
e61e0f51 165struct i915_request;
c81d4613 166
022d3093
TU
167#define I915_MAX_VCS 4
168#define I915_MAX_VECS 2
169
237ae7c7
MW
170/*
171 * Engine IDs definitions.
172 * Keep instances of the same type engine together.
173 */
174enum intel_engine_id {
175 RCS = 0,
176 BCS,
177 VCS,
178 VCS2,
022d3093
TU
179 VCS3,
180 VCS4,
237ae7c7 181#define _VCS(n) (VCS + (n))
022d3093
TU
182 VECS,
183 VECS2
184#define _VECS(n) (VECS + (n))
237ae7c7
MW
185};
186
6c067579 187struct i915_priolist {
85f5e1f3 188 struct list_head requests[I915_PRIORITY_COUNT];
6c067579 189 struct rb_node node;
85f5e1f3 190 unsigned long used;
6c067579
CW
191 int priority;
192};
193
85f5e1f3
CW
194#define priolist_for_each_request(it, plist, idx) \
195 for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
196 list_for_each_entry(it, &(plist)->requests[idx], sched.link)
197
198#define priolist_for_each_request_consume(it, n, plist, idx) \
199 for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
200 list_for_each_entry_safe(it, n, \
201 &(plist)->requests[idx - 1], \
202 sched.link)
203
0f6b79fa
CW
204struct st_preempt_hang {
205 struct completion completion;
206 bool inject_hang;
207};
208
b620e870
MK
209/**
210 * struct intel_engine_execlists - execlist submission queue and port state
211 *
212 * The struct intel_engine_execlists represents the combined logical state of
213 * driver and the hardware state for execlist mode of submission.
214 */
215struct intel_engine_execlists {
216 /**
c6dce8f1 217 * @tasklet: softirq tasklet for bottom handler
b620e870 218 */
c6dce8f1 219 struct tasklet_struct tasklet;
b620e870
MK
220
221 /**
222 * @default_priolist: priority list for I915_PRIORITY_NORMAL
223 */
224 struct i915_priolist default_priolist;
225
226 /**
227 * @no_priolist: priority lists disabled
228 */
229 bool no_priolist;
230
2fc7a06a 231 /**
05f0addd
TD
232 * @submit_reg: gen-specific execlist submission register
233 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
234 * the ExecList Submission Queue Contents register array for Gen11+
2fc7a06a 235 */
05f0addd
TD
236 u32 __iomem *submit_reg;
237
238 /**
239 * @ctrl_reg: the enhanced execlists control register, used to load the
240 * submit queue on the HW and to request preemptions to idle
241 */
242 u32 __iomem *ctrl_reg;
2fc7a06a 243
b620e870
MK
244 /**
245 * @port: execlist port states
246 *
247 * For each hardware ELSP (ExecList Submission Port) we keep
248 * track of the last request and the number of times we submitted
249 * that port to hw. We then count the number of times the hw reports
250 * a context completion or preemption. As only one context can
251 * be active on hw, we limit resubmission of context to port[0]. This
252 * is called Lite Restore, of the context.
253 */
254 struct execlist_port {
255 /**
256 * @request_count: combined request and submission count
257 */
e61e0f51 258 struct i915_request *request_count;
b620e870
MK
259#define EXECLIST_COUNT_BITS 2
260#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
261#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
262#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
263#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
264#define port_set(p, packed) ((p)->request_count = (packed))
265#define port_isset(p) ((p)->request_count)
7a62cc61 266#define port_index(p, execlists) ((p) - (execlists)->port)
b620e870
MK
267
268 /**
269 * @context_id: context ID for port
270 */
271 GEM_DEBUG_DECL(u32 context_id);
76e70087
MK
272
273#define EXECLIST_MAX_PORTS 2
274 } port[EXECLIST_MAX_PORTS];
275
beecec90 276 /**
4a118ecb
CW
277 * @active: is the HW active? We consider the HW as active after
278 * submitting any context for execution and until we have seen the
279 * last context completion event. After that, we do not expect any
280 * more events until we submit, and so can park the HW.
281 *
282 * As we have a small number of different sources from which we feed
283 * the HW, we track the state of each inside a single bitfield.
beecec90 284 */
4a118ecb
CW
285 unsigned int active;
286#define EXECLISTS_ACTIVE_USER 0
287#define EXECLISTS_ACTIVE_PREEMPT 1
ba74cb10 288#define EXECLISTS_ACTIVE_HWACK 2
beecec90 289
76e70087
MK
290 /**
291 * @port_mask: number of execlist ports - 1
292 */
293 unsigned int port_mask;
b620e870 294
f6322edd 295 /**
4d97cbe0 296 * @queue_priority_hint: Highest pending priority.
f6322edd
CW
297 *
298 * When we add requests into the queue, or adjust the priority of
299 * executing requests, we compute the maximum priority of those
300 * pending requests. We can then use this value to determine if
301 * we need to preempt the executing requests to service the queue.
4d97cbe0
CW
302 * However, since the we may have recorded the priority of an inflight
303 * request we wanted to preempt but since completed, at the time of
304 * dequeuing the priority hint may no longer may match the highest
305 * available request priority.
f6322edd 306 */
4d97cbe0 307 int queue_priority_hint;
f6322edd 308
b620e870
MK
309 /**
310 * @queue: queue of requests, in priority lists
311 */
655250a8 312 struct rb_root_cached queue;
b620e870 313
b620e870 314 /**
bc4237ec
CW
315 * @csb_write: control register for Context Switch buffer
316 *
317 * Note this register may be either mmio or HWSP shadow.
b620e870 318 */
bc4237ec 319 u32 *csb_write;
b620e870
MK
320
321 /**
bc4237ec
CW
322 * @csb_status: status array for Context Switch buffer
323 *
324 * Note these register may be either mmio or HWSP shadow.
b620e870 325 */
bc4237ec 326 u32 *csb_status;
d6376374
CW
327
328 /**
329 * @preempt_complete_status: expected CSB upon completing preemption
330 */
331 u32 preempt_complete_status;
bc4237ec
CW
332
333 /**
334 * @csb_head: context status buffer head
335 */
336 u8 csb_head;
0f6b79fa
CW
337
338 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
b620e870
MK
339};
340
6e516148
OM
341#define INTEL_ENGINE_CS_MAX_NAME 8
342
c033666a
CW
343struct intel_engine_cs {
344 struct drm_i915_private *i915;
6e516148 345 char name[INTEL_ENGINE_CS_MAX_NAME];
1803fcbc 346
237ae7c7 347 enum intel_engine_id id;
237ae7c7 348 unsigned int hw_id;
63ffbcda 349 unsigned int guc_id;
0908180b 350
1803fcbc
TU
351 u8 uabi_id;
352 u8 uabi_class;
353
0908180b
DCS
354 u8 class;
355 u8 instance;
63ffbcda
JL
356 u32 context_size;
357 u32 mmio_base;
63ffbcda 358
7e37f889 359 struct intel_ring *buffer;
a89d1f92
CW
360
361 struct i915_timeline timeline;
8187a2b7 362
d2b4b979 363 struct drm_i915_gem_object *default_state;
fe0c4935 364 void *pinned_default_state;
4e50f082 365
688e6c72
CW
366 /* Rather than have every client wait upon all user interrupts,
367 * with the herd waking after every interrupt and each doing the
368 * heavyweight seqno dance, we delegate the task (of being the
369 * bottom-half of the user interrupt) to the first client. After
370 * every interrupt, we wake up one client, who does the heavyweight
371 * coherent seqno read and either goes back to sleep (if incomplete),
372 * or wakes up all the completed clients in parallel, before then
373 * transferring the bottom-half status to the next client in the queue.
374 *
375 * Compared to walking the entire list of waiters in a single dedicated
376 * bottom-half, we reduce the latency of the first waiter by avoiding
377 * a context switch, but incur additional coherent seqno reads when
378 * following the chain of request breadcrumbs. Since it is most likely
379 * that we have a single client waiting on each seqno, then reducing
380 * the overhead of waking that client is much preferred.
381 */
382 struct intel_breadcrumbs {
61d3dc70
CW
383 spinlock_t irq_lock; /* protects irq_*; irqsafe */
384 struct intel_wait *irq_wait; /* oldest waiter by retirement */
385
386 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
688e6c72 387 struct rb_root waiters; /* sorted by retirement, priority */
cd46c545 388 struct list_head signals; /* sorted by retirement */
c81d4613 389 struct task_struct *signaler; /* used for fence signalling */
cd46c545 390
688e6c72 391 struct timer_list fake_irq; /* used after a missed interrupt */
83348ba8
CW
392 struct timer_list hangcheck; /* detect missed interrupts */
393
2246bea6 394 unsigned int hangcheck_interrupts;
bcbd5c33 395 unsigned int irq_enabled;
78796877 396 unsigned int irq_count;
aca34b6e 397
67b807a8 398 bool irq_armed : 1;
688e6c72
CW
399 } breadcrumbs;
400
b46a33e2
TU
401 struct {
402 /**
403 * @enable: Bitmask of enable sample events on this engine.
404 *
405 * Bits correspond to sample event types, for instance
406 * I915_SAMPLE_QUEUED is bit 0 etc.
407 */
408 u32 enable;
409 /**
410 * @enable_count: Reference count for the enabled samplers.
411 *
412 * Index number corresponds to the bit number from @enable.
413 */
414 unsigned int enable_count[I915_PMU_SAMPLE_BITS];
415 /**
416 * @sample: Counter values for sampling events.
417 *
418 * Our internal timer stores the current counters in this field.
419 */
b552ae44 420#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
b46a33e2
TU
421 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
422 } pmu;
423
06fbca71
CW
424 /*
425 * A pool of objects to use as shadow copies of client batch buffers
426 * when the command parser is enabled. Prevents the client from
427 * modifying the batch contents after software parsing.
428 */
429 struct i915_gem_batch_pool batch_pool;
430
8187a2b7 431 struct intel_hw_status_page status_page;
17ee950d 432 struct i915_ctx_workarounds wa_ctx;
452420d2 433 struct i915_wa_list ctx_wa_list;
4a15c75c 434 struct i915_wa_list wa_list;
69bcdecf 435 struct i915_wa_list whitelist;
8187a2b7 436
61ff75ac
CW
437 u32 irq_keep_mask; /* always keep these interrupts */
438 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
38a0f2db
DG
439 void (*irq_enable)(struct intel_engine_cs *engine);
440 void (*irq_disable)(struct intel_engine_cs *engine);
8187a2b7 441
38a0f2db 442 int (*init_hw)(struct intel_engine_cs *engine);
5adfb772
CW
443
444 struct {
eb8d0f5a
CW
445 void (*prepare)(struct intel_engine_cs *engine);
446 void (*reset)(struct intel_engine_cs *engine, bool stalled);
5adfb772
CW
447 void (*finish)(struct intel_engine_cs *engine);
448 } reset;
8187a2b7 449
aba5e278
CW
450 void (*park)(struct intel_engine_cs *engine);
451 void (*unpark)(struct intel_engine_cs *engine);
452
ff44ad51
CW
453 void (*set_default_submission)(struct intel_engine_cs *engine);
454
1fc44d9b
CW
455 struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
456 struct i915_gem_context *ctx);
457
e61e0f51
CW
458 int (*request_alloc)(struct i915_request *rq);
459 int (*init_context)(struct i915_request *rq);
86d7f238 460
e61e0f51 461 int (*emit_flush)(struct i915_request *request, u32 mode);
ddd66c51
CW
462#define EMIT_INVALIDATE BIT(0)
463#define EMIT_FLUSH BIT(1)
464#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
e61e0f51 465 int (*emit_bb_start)(struct i915_request *rq,
ddd66c51
CW
466 u64 offset, u32 length,
467 unsigned int dispatch_flags);
468#define I915_DISPATCH_SECURE BIT(0)
469#define I915_DISPATCH_PINNED BIT(1)
85474441
CW
470 int (*emit_init_breadcrumb)(struct i915_request *rq);
471 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
472 u32 *cs);
473 unsigned int emit_fini_breadcrumb_dw;
5590af3e
CW
474
475 /* Pass the request to the hardware queue (e.g. directly into
476 * the legacy ringbuffer or to the end of an execlist).
477 *
478 * This is called from an atomic context with irqs disabled; must
479 * be irq safe.
480 */
e61e0f51 481 void (*submit_request)(struct i915_request *rq);
5590af3e 482
e2f3496e
CW
483 /*
484 * Call when the priority on a request has changed and it and its
0de9136d
CW
485 * dependencies may need rescheduling. Note the request itself may
486 * not be ready to run!
0de9136d 487 */
b7268c5e
CW
488 void (*schedule)(struct i915_request *request,
489 const struct i915_sched_attr *attr);
0de9136d 490
27a5f61b
CW
491 /*
492 * Cancel all requests on the hardware, or queued for execution.
493 * This should only cancel the ready requests that have been
494 * submitted to the engine (via the engine->submit_request callback).
495 * This is called when marking the device as wedged.
496 */
497 void (*cancel_requests)(struct intel_engine_cs *engine);
498
38a0f2db 499 void (*cleanup)(struct intel_engine_cs *engine);
ebc348b2 500
b620e870 501 struct intel_engine_execlists execlists;
4da46e1e 502
e8a9c58f
CW
503 /* Contexts are pinned whilst they are active on the GPU. The last
504 * context executed remains active whilst the GPU is idle - the
505 * switch away and write to the context object only occurs on the
506 * next execution. Contexts are only unpinned on retirement of the
507 * following request ensuring that we can always write to the object
508 * on the context switch even after idling. Across suspend, we switch
509 * to the kernel context and trash it as the save may not happen
510 * before the hardware is powered down.
511 */
1fc44d9b 512 struct intel_context *last_retired_context;
e8a9c58f 513
3fc03069
CD
514 /* status_notifier: list of callbacks for context-switch changes */
515 struct atomic_notifier_head context_status_notifier;
516
7e37f889 517 struct intel_engine_hangcheck hangcheck;
92cab734 518
439e2ee4 519#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
cf669b4e 520#define I915_ENGINE_SUPPORTS_STATS BIT(1)
2a694feb 521#define I915_ENGINE_HAS_PREEMPTION BIT(2)
439e2ee4 522 unsigned int flags;
44e895a8 523
351e3db2 524 /*
44e895a8 525 * Table of commands the command parser needs to know about
33a051a5 526 * for this engine.
351e3db2 527 */
44e895a8 528 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
351e3db2
BV
529
530 /*
531 * Table of registers allowed in commands that read/write registers.
532 */
361b027b
JJ
533 const struct drm_i915_reg_table *reg_tables;
534 int reg_table_count;
351e3db2
BV
535
536 /*
537 * Returns the bitmask for the length field of the specified command.
538 * Return 0 for an unrecognized/invalid command.
539 *
33a051a5 540 * If the command parser finds an entry for a command in the engine's
351e3db2 541 * cmd_tables, it gets the command's length based on the table entry.
33a051a5
CW
542 * If not, it calls this function to determine the per-engine length
543 * field encoding for the command (i.e. different opcode ranges use
544 * certain bits to encode the command length in the header).
351e3db2
BV
545 */
546 u32 (*get_cmd_length_mask)(u32 cmd_header);
30e17b78
TU
547
548 struct {
549 /**
550 * @lock: Lock protecting the below fields.
551 */
741258cd 552 seqlock_t lock;
30e17b78
TU
553 /**
554 * @enabled: Reference count indicating number of listeners.
555 */
556 unsigned int enabled;
557 /**
558 * @active: Number of contexts currently scheduled in.
559 */
560 unsigned int active;
561 /**
562 * @enabled_at: Timestamp when busy stats were enabled.
563 */
564 ktime_t enabled_at;
565 /**
566 * @start: Timestamp of the last idle to active transition.
567 *
568 * Idle is defined as active == 0, active is active > 0.
569 */
570 ktime_t start;
571 /**
572 * @total: Total time this engine was busy.
573 *
574 * Accumulated time not counting the most recent block in cases
575 * where engine is currently busy (active > 0).
576 */
577 ktime_t total;
578 } stats;
8187a2b7
ZN
579};
580
2a694feb
CW
581static inline bool
582intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
439e2ee4
TU
583{
584 return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
585}
586
2a694feb
CW
587static inline bool
588intel_engine_supports_stats(const struct intel_engine_cs *engine)
cf669b4e
TU
589{
590 return engine->flags & I915_ENGINE_SUPPORTS_STATS;
591}
592
2a694feb
CW
593static inline bool
594intel_engine_has_preemption(const struct intel_engine_cs *engine)
595{
596 return engine->flags & I915_ENGINE_HAS_PREEMPTION;
597}
598
599static inline bool __execlists_need_preempt(int prio, int last)
600{
601 return prio > max(0, last);
602}
603
4a118ecb
CW
604static inline void
605execlists_set_active(struct intel_engine_execlists *execlists,
606 unsigned int bit)
607{
608 __set_bit(bit, (unsigned long *)&execlists->active);
609}
610
f2605207
CW
611static inline bool
612execlists_set_active_once(struct intel_engine_execlists *execlists,
613 unsigned int bit)
614{
615 return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
616}
617
4a118ecb
CW
618static inline void
619execlists_clear_active(struct intel_engine_execlists *execlists,
620 unsigned int bit)
621{
622 __clear_bit(bit, (unsigned long *)&execlists->active);
623}
624
0051163a
CW
625static inline void
626execlists_clear_all_active(struct intel_engine_execlists *execlists)
627{
628 execlists->active = 0;
629}
630
4a118ecb
CW
631static inline bool
632execlists_is_active(const struct intel_engine_execlists *execlists,
633 unsigned int bit)
634{
635 return test_bit(bit, (unsigned long *)&execlists->active);
636}
637
f2605207
CW
638void execlists_user_begin(struct intel_engine_execlists *execlists,
639 const struct execlist_port *port);
640void execlists_user_end(struct intel_engine_execlists *execlists);
641
c41937fd
MW
642void
643execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
644
645void
646execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
647
76e70087
MK
648static inline unsigned int
649execlists_num_ports(const struct intel_engine_execlists * const execlists)
650{
651 return execlists->port_mask + 1;
652}
653
f2605207 654static inline struct execlist_port *
7a62cc61
MK
655execlists_port_complete(struct intel_engine_execlists * const execlists,
656 struct execlist_port * const port)
657{
76e70087 658 const unsigned int m = execlists->port_mask;
7a62cc61
MK
659
660 GEM_BUG_ON(port_index(port, execlists) != 0);
4a118ecb 661 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
7a62cc61 662
76e70087
MK
663 memmove(port, port + 1, m * sizeof(struct execlist_port));
664 memset(port + m, 0, sizeof(struct execlist_port));
f2605207
CW
665
666 return port;
7a62cc61
MK
667}
668
59ce1310 669static inline unsigned int
67d97da3 670intel_engine_flag(const struct intel_engine_cs *engine)
96154f2f 671{
59ce1310 672 return BIT(engine->id);
96154f2f
DV
673}
674
8187a2b7 675static inline u32
3ceda3a4 676intel_read_status_page(const struct intel_engine_cs *engine, int reg)
8187a2b7 677{
4225d0f2 678 /* Ensure that the compiler doesn't optimize away the load. */
0ca88ba0 679 return READ_ONCE(engine->status_page.addr[reg]);
8187a2b7
ZN
680}
681
b70ec5bf 682static inline void
9a29dd85 683intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
b70ec5bf 684{
9a29dd85
CW
685 /* Writing into the status page should be done sparingly. Since
686 * we do when we are uncertain of the device state, we take a bit
687 * of extra paranoia to try and ensure that the HWS takes the value
688 * we give and that it doesn't end up trapped inside the CPU!
689 */
690 if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
691 mb();
0ca88ba0
CW
692 clflush(&engine->status_page.addr[reg]);
693 engine->status_page.addr[reg] = value;
694 clflush(&engine->status_page.addr[reg]);
9a29dd85
CW
695 mb();
696 } else {
0ca88ba0 697 WRITE_ONCE(engine->status_page.addr[reg], value);
9a29dd85 698 }
b70ec5bf
MK
699}
700
e2828914 701/*
311bd68e
CW
702 * Reads a dword out of the status page, which is written to from the command
703 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
704 * MI_STORE_DATA_IMM.
705 *
706 * The following dwords have a reserved meaning:
707 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
708 * 0x04: ring 0 head pointer
709 * 0x05: ring 1 head pointer (915-class)
710 * 0x06: ring 2 head pointer (915-class)
711 * 0x10-0x1b: Context status DWords (GM45)
712 * 0x1f: Last written status offset. (GM45)
b07da53c 713 * 0x20-0x2f: Reserved (Gen6+)
311bd68e 714 *
b07da53c 715 * The area from dword 0x30 to 0x3ff is available for driver usage.
311bd68e 716 */
b07da53c 717#define I915_GEM_HWS_INDEX 0x30
832a67bd
CW
718#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32))
719#define I915_GEM_HWS_PREEMPT 0x32
720#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
52954edd
CW
721#define I915_GEM_HWS_SEQNO 0x40
722#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
723#define I915_GEM_HWS_SCRATCH 0x80
832a67bd 724#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
311bd68e 725
6d2cb5aa 726#define I915_HWS_CSB_BUF0_INDEX 0x10
767a983a
CW
727#define I915_HWS_CSB_WRITE_INDEX 0x1f
728#define CNL_HWS_CSB_WRITE_INDEX 0x2f
6d2cb5aa 729
7e37f889 730struct intel_ring *
65fcb806 731intel_engine_create_ring(struct intel_engine_cs *engine,
a89d1f92 732 struct i915_timeline *timeline,
65fcb806 733 int size);
5503cb0d 734int intel_ring_pin(struct intel_ring *ring);
e6ba9992 735void intel_ring_reset(struct intel_ring *ring, u32 tail);
95aebcb2 736unsigned int intel_ring_update_space(struct intel_ring *ring);
aad29fbb 737void intel_ring_unpin(struct intel_ring *ring);
7e37f889 738void intel_ring_free(struct intel_ring *ring);
84c2377f 739
7e37f889
CW
740void intel_engine_stop(struct intel_engine_cs *engine);
741void intel_engine_cleanup(struct intel_engine_cs *engine);
96f298aa 742
821ed7df
CW
743void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
744
e61e0f51 745int __must_check intel_ring_cacheline_align(struct i915_request *rq);
406ea8d2 746
e61e0f51 747u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
406ea8d2 748
e61e0f51 749static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
09246732 750{
8f942018
CW
751 /* Dummy function.
752 *
753 * This serves as a placeholder in the code so that the reader
754 * can compare against the preceding intel_ring_begin() and
755 * check that the number of dwords emitted matches the space
756 * reserved for the command packet (i.e. the value passed to
757 * intel_ring_begin()).
c5efa1ad 758 */
e61e0f51 759 GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
8f942018
CW
760}
761
e61e0f51 762static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
450362d3
CW
763{
764 return pos & (ring->size - 1);
765}
766
41d37680
CW
767static inline bool
768intel_ring_offset_valid(const struct intel_ring *ring,
769 unsigned int pos)
770{
771 if (pos & -ring->size) /* must be strictly within the ring */
772 return false;
773
774 if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
775 return false;
776
777 return true;
778}
779
e61e0f51 780static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
8f942018
CW
781{
782 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
e61e0f51
CW
783 u32 offset = addr - rq->ring->vaddr;
784 GEM_BUG_ON(offset > rq->ring->size);
785 return intel_ring_wrap(rq->ring, offset);
09246732 786}
406ea8d2 787
ed1501d4
CW
788static inline void
789assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
790{
41d37680 791 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
605d5b32
CW
792
793 /*
794 * "Ring Buffer Use"
795 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
796 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
797 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
798 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
799 * same cacheline, the Head Pointer must not be greater than the Tail
800 * Pointer."
801 *
802 * We use ring->head as the last known location of the actual RING_HEAD,
803 * it may have advanced but in the worst case it is equally the same
804 * as ring->head and so we should never program RING_TAIL to advance
805 * into the same cacheline as ring->head.
806 */
807#define cacheline(a) round_down(a, CACHELINE_BYTES)
808 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
809 tail < ring->head);
810#undef cacheline
ed1501d4
CW
811}
812
e6ba9992
CW
813static inline unsigned int
814intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
815{
816 /* Whilst writes to the tail are strictly order, there is no
817 * serialisation between readers and the writers. The tail may be
e61e0f51 818 * read by i915_request_retire() just as it is being updated
e6ba9992
CW
819 * by execlists, as although the breadcrumb is complete, the context
820 * switch hasn't been seen.
821 */
822 assert_ring_tail_valid(ring, tail);
823 ring->tail = tail;
824 return tail;
825}
09246732 826
6faf5916 827void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
8187a2b7 828
52954edd 829int intel_engine_setup_common(struct intel_engine_cs *engine);
019bf277 830int intel_engine_init_common(struct intel_engine_cs *engine);
96a945aa 831void intel_engine_cleanup_common(struct intel_engine_cs *engine);
019bf277 832
8b3e2d36
TU
833int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
834int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
8b3e2d36
TU
835int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
836int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
8187a2b7 837
3f6e9822 838int intel_engine_stop_cs(struct intel_engine_cs *engine);
a99b32a6 839void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
3f6e9822 840
060f2322
CW
841void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
842
3ceda3a4
CW
843u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
844u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
1b36595f 845
cb399eab
CW
846static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
847{
97f06158
CW
848 /*
849 * We are only peeking at the tail of the submit queue (and not the
cb399eab
CW
850 * queue itself) in order to gain a hint as to the current active
851 * state of the engine. Callers are not expected to be taking
852 * engine->timeline->lock, nor are they expected to be concerned
853 * wtih serialising this hint with anything, so document it as
854 * a hint and nothing more.
855 */
a89d1f92 856 return READ_ONCE(engine->timeline.seqno);
cb399eab
CW
857}
858
97f06158
CW
859static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
860{
861 return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
862}
863
864static inline bool intel_engine_signaled(struct intel_engine_cs *engine,
865 u32 seqno)
866{
867 return i915_seqno_passed(intel_engine_get_seqno(engine), seqno);
868}
869
870static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
871 u32 seqno)
872{
873 GEM_BUG_ON(!seqno);
874 return intel_engine_signaled(engine, seqno);
875}
876
877static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
878 u32 seqno)
879{
880 GEM_BUG_ON(!seqno);
881 return intel_engine_signaled(engine, seqno - 1);
882}
883
0e704476
CW
884void intel_engine_get_instdone(struct intel_engine_cs *engine,
885 struct intel_instdone *instdone);
886
688e6c72 887/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
688e6c72
CW
888int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
889
e3be4079 890static inline void intel_wait_init(struct intel_wait *wait)
688e6c72
CW
891{
892 wait->tsk = current;
e3be4079 893 wait->request = NULL;
754c9fd5
CW
894}
895
896static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
897{
898 wait->tsk = current;
899 wait->seqno = seqno;
900}
901
902static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
903{
904 return wait->seqno;
905}
906
907static inline bool
908intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
909{
688e6c72 910 wait->seqno = seqno;
754c9fd5
CW
911 return intel_wait_has_seqno(wait);
912}
913
914static inline bool
915intel_wait_update_request(struct intel_wait *wait,
e61e0f51 916 const struct i915_request *rq)
754c9fd5 917{
e61e0f51 918 return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
754c9fd5
CW
919}
920
921static inline bool
922intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
923{
924 return wait->seqno == seqno;
925}
926
927static inline bool
928intel_wait_check_request(const struct intel_wait *wait,
e61e0f51 929 const struct i915_request *rq)
754c9fd5 930{
e61e0f51 931 return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
688e6c72
CW
932}
933
934static inline bool intel_wait_complete(const struct intel_wait *wait)
935{
936 return RB_EMPTY_NODE(&wait->node);
937}
938
939bool intel_engine_add_wait(struct intel_engine_cs *engine,
940 struct intel_wait *wait);
941void intel_engine_remove_wait(struct intel_engine_cs *engine,
942 struct intel_wait *wait);
6f9ec414 943bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
e61e0f51 944void intel_engine_cancel_signaling(struct i915_request *request);
688e6c72 945
dbd6ef29 946static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
688e6c72 947{
61d3dc70 948 return READ_ONCE(engine->breadcrumbs.irq_wait);
688e6c72
CW
949}
950
8d769ea7
CW
951unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
952#define ENGINE_WAKEUP_WAITER BIT(0)
67b807a8
CW
953#define ENGINE_WAKEUP_ASLEEP BIT(1)
954
bcbd5c33
CW
955void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
956void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
957
67b807a8
CW
958void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
959void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 960
ad07dfcd 961void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 962void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 963
9f235dfa
TU
964static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
965{
966 memset(batch, 0, 6 * sizeof(u32));
967
968 batch[0] = GFX_OP_PIPE_CONTROL(6);
969 batch[1] = flags;
970 batch[2] = offset;
971
972 return batch + 6;
973}
974
df77cd83 975static inline u32 *
6a623729 976gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
df77cd83
MW
977{
978 /* We're using qword write, offset should be aligned to 8 bytes. */
979 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
980
981 /* w/a for post sync ops following a GPGPU operation we
982 * need a prior CS_STALL, which is emitted by the flush
983 * following the batch.
984 */
985 *cs++ = GFX_OP_PIPE_CONTROL(6);
6a623729 986 *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
df77cd83
MW
987 *cs++ = gtt_offset;
988 *cs++ = 0;
989 *cs++ = value;
990 /* We're thrashing one dword of HWS. */
991 *cs++ = 0;
992
993 return cs;
994}
995
996static inline u32 *
997gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
998{
999 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
1000 GEM_BUG_ON(gtt_offset & (1 << 5));
1001 /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
1002 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
1003
1004 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
1005 *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
1006 *cs++ = 0;
1007 *cs++ = value;
1008
1009 return cs;
1010}
1011
eb8d0f5a
CW
1012static inline void intel_engine_reset(struct intel_engine_cs *engine,
1013 bool stalled)
1014{
1015 if (engine->reset.reset)
1016 engine->reset.reset(engine, stalled);
1017}
1018
55277e1f 1019void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
4fdd5b4e 1020
5400367a 1021bool intel_engine_is_idle(struct intel_engine_cs *engine);
05425249 1022bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
5400367a 1023
20ccd4d3 1024bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
01278cb1 1025void intel_engine_lost_context(struct intel_engine_cs *engine);
20ccd4d3 1026
aba5e278
CW
1027void intel_engines_park(struct drm_i915_private *i915);
1028void intel_engines_unpark(struct drm_i915_private *i915);
1029
ff44ad51 1030void intel_engines_reset_default_submission(struct drm_i915_private *i915);
d2b4b979 1031unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
ff44ad51 1032
90cad095 1033bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
f2f5c061 1034
0db18b17
CW
1035__printf(3, 4)
1036void intel_engine_dump(struct intel_engine_cs *engine,
1037 struct drm_printer *m,
1038 const char *header, ...);
f636edb2 1039
b46a33e2
TU
1040struct intel_engine_cs *
1041intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
1042
30e17b78
TU
1043static inline void intel_engine_context_in(struct intel_engine_cs *engine)
1044{
1045 unsigned long flags;
1046
1047 if (READ_ONCE(engine->stats.enabled) == 0)
1048 return;
1049
741258cd 1050 write_seqlock_irqsave(&engine->stats.lock, flags);
30e17b78
TU
1051
1052 if (engine->stats.enabled > 0) {
1053 if (engine->stats.active++ == 0)
1054 engine->stats.start = ktime_get();
1055 GEM_BUG_ON(engine->stats.active == 0);
1056 }
1057
741258cd 1058 write_sequnlock_irqrestore(&engine->stats.lock, flags);
30e17b78
TU
1059}
1060
1061static inline void intel_engine_context_out(struct intel_engine_cs *engine)
1062{
1063 unsigned long flags;
1064
1065 if (READ_ONCE(engine->stats.enabled) == 0)
1066 return;
1067
741258cd 1068 write_seqlock_irqsave(&engine->stats.lock, flags);
30e17b78
TU
1069
1070 if (engine->stats.enabled > 0) {
1071 ktime_t last;
1072
1073 if (engine->stats.active && --engine->stats.active == 0) {
1074 /*
1075 * Decrement the active context count and in case GPU
1076 * is now idle add up to the running total.
1077 */
1078 last = ktime_sub(ktime_get(), engine->stats.start);
1079
1080 engine->stats.total = ktime_add(engine->stats.total,
1081 last);
1082 } else if (engine->stats.active == 0) {
1083 /*
1084 * After turning on engine stats, context out might be
1085 * the first event in which case we account from the
1086 * time stats gathering was turned on.
1087 */
1088 last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1089
1090 engine->stats.total = ktime_add(engine->stats.total,
1091 last);
1092 }
1093 }
1094
741258cd 1095 write_sequnlock_irqrestore(&engine->stats.lock, flags);
30e17b78
TU
1096}
1097
1098int intel_enable_engine_stats(struct intel_engine_cs *engine);
1099void intel_disable_engine_stats(struct intel_engine_cs *engine);
1100
1101ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
1102
0f6b79fa
CW
1103#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1104
1105static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1106{
1107 if (!execlists->preempt_hang.inject_hang)
1108 return false;
1109
1110 complete(&execlists->preempt_hang.completion);
1111 return true;
1112}
1113
1114#else
1115
1116static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1117{
1118 return false;
1119}
1120
1121#endif
1122
8187a2b7 1123#endif /* _INTEL_RINGBUFFER_H_ */