Merge remote-tracking branches 'asoc/topic/wm8753', 'asoc/topic/wm8770', 'asoc/topic...
[linux-block.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
8187a2b7
ZN
2#ifndef _INTEL_RINGBUFFER_H_
3#define _INTEL_RINGBUFFER_H_
4
44e895a8 5#include <linux/hashtable.h>
06fbca71 6#include "i915_gem_batch_pool.h"
dcff85c8 7#include "i915_gem_request.h"
73cb9701 8#include "i915_gem_timeline.h"
b46a33e2 9#include "i915_pmu.h"
f97fbf96 10#include "i915_selftest.h"
44e895a8 11
f636edb2
CW
12struct drm_printer;
13
44e895a8
BV
14#define I915_CMD_HASH_ORDER 9
15
4712274c
OM
16/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
17 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
18 * to give some inclination as to some of the magic values used in the various
19 * workarounds!
20 */
21#define CACHELINE_BYTES 64
17ee950d 22#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
4712274c 23
57e88531
CW
24struct intel_hw_status_page {
25 struct i915_vma *vma;
26 u32 *page_addr;
27 u32 ggtt_offset;
8187a2b7
ZN
28};
29
bbdc070a
DG
30#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
31#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
cae5852d 32
bbdc070a
DG
33#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
34#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
cae5852d 35
bbdc070a
DG
36#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
37#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
cae5852d 38
bbdc070a
DG
39#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
40#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
cae5852d 41
bbdc070a
DG
42#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
43#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
870e86dd 44
bbdc070a
DG
45#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
46#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
e9fea574 47
3e78998a
BW
48/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
49 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
50 */
7e37f889 51enum intel_engine_hangcheck_action {
3fe3b030
MK
52 ENGINE_IDLE = 0,
53 ENGINE_WAIT,
54 ENGINE_ACTIVE_SEQNO,
55 ENGINE_ACTIVE_HEAD,
56 ENGINE_ACTIVE_SUBUNITS,
57 ENGINE_WAIT_KICK,
58 ENGINE_DEAD,
f2f4d82f 59};
ad8beaea 60
3fe3b030
MK
61static inline const char *
62hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
63{
64 switch (a) {
65 case ENGINE_IDLE:
66 return "idle";
67 case ENGINE_WAIT:
68 return "wait";
69 case ENGINE_ACTIVE_SEQNO:
70 return "active seqno";
71 case ENGINE_ACTIVE_HEAD:
72 return "active head";
73 case ENGINE_ACTIVE_SUBUNITS:
74 return "active subunits";
75 case ENGINE_WAIT_KICK:
76 return "wait kick";
77 case ENGINE_DEAD:
78 return "dead";
79 }
80
81 return "unknown";
82}
b6b0fac0 83
f9e61372
BW
84#define I915_MAX_SLICES 3
85#define I915_MAX_SUBSLICES 3
86
87#define instdone_slice_mask(dev_priv__) \
88 (INTEL_GEN(dev_priv__) == 7 ? \
89 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
90
91#define instdone_subslice_mask(dev_priv__) \
92 (INTEL_GEN(dev_priv__) == 7 ? \
93 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
94
95#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
96 for ((slice__) = 0, (subslice__) = 0; \
97 (slice__) < I915_MAX_SLICES; \
98 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
99 (slice__) += ((subslice__) == 0)) \
100 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
101 (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
102
d636951e
BW
103struct intel_instdone {
104 u32 instdone;
105 /* The following exist only in the RCS engine */
106 u32 slice_common;
f9e61372
BW
107 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
108 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
d636951e
BW
109};
110
7e37f889 111struct intel_engine_hangcheck {
50877445 112 u64 acthd;
92cab734 113 u32 seqno;
7e37f889 114 enum intel_engine_hangcheck_action action;
3fe3b030 115 unsigned long action_timestamp;
4be17381 116 int deadlock;
d636951e 117 struct intel_instdone instdone;
c64992e0 118 struct drm_i915_gem_request *active_request;
3fe3b030 119 bool stalled;
92cab734
MK
120};
121
7e37f889 122struct intel_ring {
0eb973d3 123 struct i915_vma *vma;
57e88531 124 void *vaddr;
8ee14975 125
675d9ad7
CW
126 struct list_head request_list;
127
8ee14975
OM
128 u32 head;
129 u32 tail;
e6ba9992 130 u32 emit;
eca56a35 131
605d5b32
CW
132 u32 space;
133 u32 size;
134 u32 effective_size;
8ee14975
OM
135};
136
e2efd130 137struct i915_gem_context;
361b027b 138struct drm_i915_reg_table;
21076372 139
17ee950d
AS
140/*
141 * we use a single page to load ctx workarounds so all of these
142 * values are referred in terms of dwords
143 *
144 * struct i915_wa_ctx_bb:
145 * offset: specifies batch starting position, also helpful in case
146 * if we want to have multiple batches at different offsets based on
147 * some criteria. It is not a requirement at the moment but provides
148 * an option for future use.
149 * size: size of the batch in DWORDS
150 */
48bb74e4 151struct i915_ctx_workarounds {
17ee950d
AS
152 struct i915_wa_ctx_bb {
153 u32 offset;
154 u32 size;
155 } indirect_ctx, per_ctx;
48bb74e4 156 struct i915_vma *vma;
17ee950d
AS
157};
158
c81d4613
CW
159struct drm_i915_gem_request;
160
237ae7c7
MW
161/*
162 * Engine IDs definitions.
163 * Keep instances of the same type engine together.
164 */
165enum intel_engine_id {
166 RCS = 0,
167 BCS,
168 VCS,
169 VCS2,
170#define _VCS(n) (VCS + (n))
171 VECS
172};
173
6c067579
CW
174struct i915_priolist {
175 struct rb_node node;
176 struct list_head requests;
177 int priority;
178};
179
b620e870
MK
180/**
181 * struct intel_engine_execlists - execlist submission queue and port state
182 *
183 * The struct intel_engine_execlists represents the combined logical state of
184 * driver and the hardware state for execlist mode of submission.
185 */
186struct intel_engine_execlists {
187 /**
c6dce8f1 188 * @tasklet: softirq tasklet for bottom handler
b620e870 189 */
c6dce8f1 190 struct tasklet_struct tasklet;
b620e870
MK
191
192 /**
193 * @default_priolist: priority list for I915_PRIORITY_NORMAL
194 */
195 struct i915_priolist default_priolist;
196
197 /**
198 * @no_priolist: priority lists disabled
199 */
200 bool no_priolist;
201
2fc7a06a
CW
202 /**
203 * @elsp: the ExecList Submission Port register
204 */
205 u32 __iomem *elsp;
206
b620e870
MK
207 /**
208 * @port: execlist port states
209 *
210 * For each hardware ELSP (ExecList Submission Port) we keep
211 * track of the last request and the number of times we submitted
212 * that port to hw. We then count the number of times the hw reports
213 * a context completion or preemption. As only one context can
214 * be active on hw, we limit resubmission of context to port[0]. This
215 * is called Lite Restore, of the context.
216 */
217 struct execlist_port {
218 /**
219 * @request_count: combined request and submission count
220 */
221 struct drm_i915_gem_request *request_count;
222#define EXECLIST_COUNT_BITS 2
223#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
224#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
225#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
226#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
227#define port_set(p, packed) ((p)->request_count = (packed))
228#define port_isset(p) ((p)->request_count)
7a62cc61 229#define port_index(p, execlists) ((p) - (execlists)->port)
b620e870
MK
230
231 /**
232 * @context_id: context ID for port
233 */
234 GEM_DEBUG_DECL(u32 context_id);
76e70087
MK
235
236#define EXECLIST_MAX_PORTS 2
237 } port[EXECLIST_MAX_PORTS];
238
beecec90 239 /**
4a118ecb
CW
240 * @active: is the HW active? We consider the HW as active after
241 * submitting any context for execution and until we have seen the
242 * last context completion event. After that, we do not expect any
243 * more events until we submit, and so can park the HW.
244 *
245 * As we have a small number of different sources from which we feed
246 * the HW, we track the state of each inside a single bitfield.
beecec90 247 */
4a118ecb
CW
248 unsigned int active;
249#define EXECLISTS_ACTIVE_USER 0
250#define EXECLISTS_ACTIVE_PREEMPT 1
ba74cb10 251#define EXECLISTS_ACTIVE_HWACK 2
beecec90 252
76e70087
MK
253 /**
254 * @port_mask: number of execlist ports - 1
255 */
256 unsigned int port_mask;
b620e870
MK
257
258 /**
259 * @queue: queue of requests, in priority lists
260 */
261 struct rb_root queue;
262
263 /**
264 * @first: leftmost level in priority @queue
265 */
266 struct rb_node *first;
267
268 /**
269 * @fw_domains: forcewake domains for irq tasklet
270 */
271 unsigned int fw_domains;
272
273 /**
274 * @csb_head: context status buffer head
275 */
276 unsigned int csb_head;
277
278 /**
279 * @csb_use_mmio: access csb through mmio, instead of hwsp
280 */
281 bool csb_use_mmio;
282};
283
6e516148
OM
284#define INTEL_ENGINE_CS_MAX_NAME 8
285
c033666a
CW
286struct intel_engine_cs {
287 struct drm_i915_private *i915;
6e516148 288 char name[INTEL_ENGINE_CS_MAX_NAME];
1803fcbc 289
237ae7c7 290 enum intel_engine_id id;
237ae7c7 291 unsigned int hw_id;
63ffbcda 292 unsigned int guc_id;
0908180b 293
1803fcbc
TU
294 u8 uabi_id;
295 u8 uabi_class;
296
0908180b
DCS
297 u8 class;
298 u8 instance;
63ffbcda
JL
299 u32 context_size;
300 u32 mmio_base;
c2c7f240 301 unsigned int irq_shift;
63ffbcda 302
7e37f889 303 struct intel_ring *buffer;
73cb9701 304 struct intel_timeline *timeline;
8187a2b7 305
d2b4b979 306 struct drm_i915_gem_object *default_state;
4e50f082 307
2246bea6 308 atomic_t irq_count;
538b257d
CW
309 unsigned long irq_posted;
310#define ENGINE_IRQ_BREADCRUMB 0
f747026c 311#define ENGINE_IRQ_EXECLIST 1
538b257d 312
688e6c72
CW
313 /* Rather than have every client wait upon all user interrupts,
314 * with the herd waking after every interrupt and each doing the
315 * heavyweight seqno dance, we delegate the task (of being the
316 * bottom-half of the user interrupt) to the first client. After
317 * every interrupt, we wake up one client, who does the heavyweight
318 * coherent seqno read and either goes back to sleep (if incomplete),
319 * or wakes up all the completed clients in parallel, before then
320 * transferring the bottom-half status to the next client in the queue.
321 *
322 * Compared to walking the entire list of waiters in a single dedicated
323 * bottom-half, we reduce the latency of the first waiter by avoiding
324 * a context switch, but incur additional coherent seqno reads when
325 * following the chain of request breadcrumbs. Since it is most likely
326 * that we have a single client waiting on each seqno, then reducing
327 * the overhead of waking that client is much preferred.
328 */
329 struct intel_breadcrumbs {
61d3dc70
CW
330 spinlock_t irq_lock; /* protects irq_*; irqsafe */
331 struct intel_wait *irq_wait; /* oldest waiter by retirement */
332
333 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
688e6c72 334 struct rb_root waiters; /* sorted by retirement, priority */
c81d4613 335 struct rb_root signals; /* sorted by retirement */
c81d4613 336 struct task_struct *signaler; /* used for fence signalling */
cced5e2f 337 struct drm_i915_gem_request __rcu *first_signal;
688e6c72 338 struct timer_list fake_irq; /* used after a missed interrupt */
83348ba8
CW
339 struct timer_list hangcheck; /* detect missed interrupts */
340
2246bea6 341 unsigned int hangcheck_interrupts;
bcbd5c33 342 unsigned int irq_enabled;
aca34b6e 343
67b807a8 344 bool irq_armed : 1;
f97fbf96 345 I915_SELFTEST_DECLARE(bool mock : 1);
688e6c72
CW
346 } breadcrumbs;
347
b46a33e2
TU
348 struct {
349 /**
350 * @enable: Bitmask of enable sample events on this engine.
351 *
352 * Bits correspond to sample event types, for instance
353 * I915_SAMPLE_QUEUED is bit 0 etc.
354 */
355 u32 enable;
356 /**
357 * @enable_count: Reference count for the enabled samplers.
358 *
359 * Index number corresponds to the bit number from @enable.
360 */
361 unsigned int enable_count[I915_PMU_SAMPLE_BITS];
362 /**
363 * @sample: Counter values for sampling events.
364 *
365 * Our internal timer stores the current counters in this field.
366 */
b552ae44 367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
b46a33e2
TU
368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
369 } pmu;
370
06fbca71
CW
371 /*
372 * A pool of objects to use as shadow copies of client batch buffers
373 * when the command parser is enabled. Prevents the client from
374 * modifying the batch contents after software parsing.
375 */
376 struct i915_gem_batch_pool batch_pool;
377
8187a2b7 378 struct intel_hw_status_page status_page;
17ee950d 379 struct i915_ctx_workarounds wa_ctx;
56c0f1a7 380 struct i915_vma *scratch;
8187a2b7 381
61ff75ac
CW
382 u32 irq_keep_mask; /* always keep these interrupts */
383 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
38a0f2db
DG
384 void (*irq_enable)(struct intel_engine_cs *engine);
385 void (*irq_disable)(struct intel_engine_cs *engine);
8187a2b7 386
38a0f2db 387 int (*init_hw)(struct intel_engine_cs *engine);
821ed7df
CW
388 void (*reset_hw)(struct intel_engine_cs *engine,
389 struct drm_i915_gem_request *req);
8187a2b7 390
aba5e278
CW
391 void (*park)(struct intel_engine_cs *engine);
392 void (*unpark)(struct intel_engine_cs *engine);
393
ff44ad51
CW
394 void (*set_default_submission)(struct intel_engine_cs *engine);
395
266a240b
CW
396 struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
397 struct i915_gem_context *ctx);
e8a9c58f
CW
398 void (*context_unpin)(struct intel_engine_cs *engine,
399 struct i915_gem_context *ctx);
f73e7399 400 int (*request_alloc)(struct drm_i915_gem_request *req);
8753181e 401 int (*init_context)(struct drm_i915_gem_request *req);
86d7f238 402
ddd66c51
CW
403 int (*emit_flush)(struct drm_i915_gem_request *request,
404 u32 mode);
405#define EMIT_INVALIDATE BIT(0)
406#define EMIT_FLUSH BIT(1)
407#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
408 int (*emit_bb_start)(struct drm_i915_gem_request *req,
409 u64 offset, u32 length,
410 unsigned int dispatch_flags);
411#define I915_DISPATCH_SECURE BIT(0)
412#define I915_DISPATCH_PINNED BIT(1)
413#define I915_DISPATCH_RS BIT(2)
caddfe71 414 void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
73dec95e 415 u32 *cs);
98f29e8d 416 int emit_breadcrumb_sz;
5590af3e
CW
417
418 /* Pass the request to the hardware queue (e.g. directly into
419 * the legacy ringbuffer or to the end of an execlist).
420 *
421 * This is called from an atomic context with irqs disabled; must
422 * be irq safe.
423 */
ddd66c51 424 void (*submit_request)(struct drm_i915_gem_request *req);
5590af3e 425
0de9136d
CW
426 /* Call when the priority on a request has changed and it and its
427 * dependencies may need rescheduling. Note the request itself may
428 * not be ready to run!
429 *
430 * Called under the struct_mutex.
431 */
432 void (*schedule)(struct drm_i915_gem_request *request,
433 int priority);
434
27a5f61b
CW
435 /*
436 * Cancel all requests on the hardware, or queued for execution.
437 * This should only cancel the ready requests that have been
438 * submitted to the engine (via the engine->submit_request callback).
439 * This is called when marking the device as wedged.
440 */
441 void (*cancel_requests)(struct intel_engine_cs *engine);
442
b2eadbc8
CW
443 /* Some chipsets are not quite as coherent as advertised and need
444 * an expensive kick to force a true read of the up-to-date seqno.
445 * However, the up-to-date seqno is not always required and the last
446 * seen value is good enough. Note that the seqno will always be
447 * monotonic, even if not coherent.
448 */
38a0f2db 449 void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
38a0f2db 450 void (*cleanup)(struct intel_engine_cs *engine);
ebc348b2 451
3e78998a
BW
452 /* GEN8 signal/wait table - never trust comments!
453 * signal to signal to signal to signal to signal to
454 * RCS VCS BCS VECS VCS2
455 * --------------------------------------------------------------------
456 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
457 * |-------------------------------------------------------------------
458 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
459 * |-------------------------------------------------------------------
460 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
461 * |-------------------------------------------------------------------
462 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
463 * |-------------------------------------------------------------------
464 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
465 * |-------------------------------------------------------------------
466 *
467 * Generalization:
468 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
469 * ie. transpose of g(x, y)
470 *
471 * sync from sync from sync from sync from sync from
472 * RCS VCS BCS VECS VCS2
473 * --------------------------------------------------------------------
474 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
475 * |-------------------------------------------------------------------
476 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
477 * |-------------------------------------------------------------------
478 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
479 * |-------------------------------------------------------------------
480 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
481 * |-------------------------------------------------------------------
482 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
483 * |-------------------------------------------------------------------
484 *
485 * Generalization:
486 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
487 * ie. transpose of f(x, y)
488 */
ebc348b2 489 struct {
318f89ca
TU
490#define GEN6_SEMAPHORE_LAST VECS_HW
491#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
492#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
79e6770c
CW
493 struct {
494 /* our mbox written by others */
495 u32 wait[GEN6_NUM_SEMAPHORES];
496 /* mboxes this ring signals to */
497 i915_reg_t signal[GEN6_NUM_SEMAPHORES];
498 } mbox;
78325f2d
BW
499
500 /* AKA wait() */
ad7bdb2b
CW
501 int (*sync_to)(struct drm_i915_gem_request *req,
502 struct drm_i915_gem_request *signal);
73dec95e 503 u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
ebc348b2 504 } semaphore;
ad776f8b 505
b620e870 506 struct intel_engine_execlists execlists;
4da46e1e 507
e8a9c58f
CW
508 /* Contexts are pinned whilst they are active on the GPU. The last
509 * context executed remains active whilst the GPU is idle - the
510 * switch away and write to the context object only occurs on the
511 * next execution. Contexts are only unpinned on retirement of the
512 * following request ensuring that we can always write to the object
513 * on the context switch even after idling. Across suspend, we switch
514 * to the kernel context and trash it as the save may not happen
515 * before the hardware is powered down.
516 */
517 struct i915_gem_context *last_retired_context;
518
519 /* We track the current MI_SET_CONTEXT in order to eliminate
520 * redudant context switches. This presumes that requests are not
521 * reordered! Or when they are the tracking is updated along with
522 * the emission of individual requests into the legacy command
523 * stream (ring).
524 */
525 struct i915_gem_context *legacy_active_context;
b1c24a61 526 struct i915_hw_ppgtt *legacy_active_ppgtt;
40521054 527
3fc03069
CD
528 /* status_notifier: list of callbacks for context-switch changes */
529 struct atomic_notifier_head context_status_notifier;
530
7e37f889 531 struct intel_engine_hangcheck hangcheck;
92cab734 532
439e2ee4 533#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
cf669b4e 534#define I915_ENGINE_SUPPORTS_STATS BIT(1)
439e2ee4 535 unsigned int flags;
44e895a8 536
351e3db2 537 /*
44e895a8 538 * Table of commands the command parser needs to know about
33a051a5 539 * for this engine.
351e3db2 540 */
44e895a8 541 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
351e3db2
BV
542
543 /*
544 * Table of registers allowed in commands that read/write registers.
545 */
361b027b
JJ
546 const struct drm_i915_reg_table *reg_tables;
547 int reg_table_count;
351e3db2
BV
548
549 /*
550 * Returns the bitmask for the length field of the specified command.
551 * Return 0 for an unrecognized/invalid command.
552 *
33a051a5 553 * If the command parser finds an entry for a command in the engine's
351e3db2 554 * cmd_tables, it gets the command's length based on the table entry.
33a051a5
CW
555 * If not, it calls this function to determine the per-engine length
556 * field encoding for the command (i.e. different opcode ranges use
557 * certain bits to encode the command length in the header).
351e3db2
BV
558 */
559 u32 (*get_cmd_length_mask)(u32 cmd_header);
30e17b78
TU
560
561 struct {
562 /**
563 * @lock: Lock protecting the below fields.
564 */
565 spinlock_t lock;
566 /**
567 * @enabled: Reference count indicating number of listeners.
568 */
569 unsigned int enabled;
570 /**
571 * @active: Number of contexts currently scheduled in.
572 */
573 unsigned int active;
574 /**
575 * @enabled_at: Timestamp when busy stats were enabled.
576 */
577 ktime_t enabled_at;
578 /**
579 * @start: Timestamp of the last idle to active transition.
580 *
581 * Idle is defined as active == 0, active is active > 0.
582 */
583 ktime_t start;
584 /**
585 * @total: Total time this engine was busy.
586 *
587 * Accumulated time not counting the most recent block in cases
588 * where engine is currently busy (active > 0).
589 */
590 ktime_t total;
591 } stats;
8187a2b7
ZN
592};
593
439e2ee4
TU
594static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
595{
596 return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
597}
598
cf669b4e
TU
599static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
600{
601 return engine->flags & I915_ENGINE_SUPPORTS_STATS;
602}
603
4a118ecb
CW
604static inline void
605execlists_set_active(struct intel_engine_execlists *execlists,
606 unsigned int bit)
607{
608 __set_bit(bit, (unsigned long *)&execlists->active);
609}
610
611static inline void
612execlists_clear_active(struct intel_engine_execlists *execlists,
613 unsigned int bit)
614{
615 __clear_bit(bit, (unsigned long *)&execlists->active);
616}
617
618static inline bool
619execlists_is_active(const struct intel_engine_execlists *execlists,
620 unsigned int bit)
621{
622 return test_bit(bit, (unsigned long *)&execlists->active);
623}
624
c41937fd
MW
625void
626execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
627
628void
629execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
630
76e70087
MK
631static inline unsigned int
632execlists_num_ports(const struct intel_engine_execlists * const execlists)
633{
634 return execlists->port_mask + 1;
635}
636
7a62cc61
MK
637static inline void
638execlists_port_complete(struct intel_engine_execlists * const execlists,
639 struct execlist_port * const port)
640{
76e70087 641 const unsigned int m = execlists->port_mask;
7a62cc61
MK
642
643 GEM_BUG_ON(port_index(port, execlists) != 0);
4a118ecb 644 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
7a62cc61 645
76e70087
MK
646 memmove(port, port + 1, m * sizeof(struct execlist_port));
647 memset(port + m, 0, sizeof(struct execlist_port));
7a62cc61
MK
648}
649
59ce1310 650static inline unsigned int
67d97da3 651intel_engine_flag(const struct intel_engine_cs *engine)
96154f2f 652{
59ce1310 653 return BIT(engine->id);
96154f2f
DV
654}
655
8187a2b7 656static inline u32
5dd8e50c 657intel_read_status_page(struct intel_engine_cs *engine, int reg)
8187a2b7 658{
4225d0f2 659 /* Ensure that the compiler doesn't optimize away the load. */
5dd8e50c 660 return READ_ONCE(engine->status_page.page_addr[reg]);
8187a2b7
ZN
661}
662
b70ec5bf 663static inline void
9a29dd85 664intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
b70ec5bf 665{
9a29dd85
CW
666 /* Writing into the status page should be done sparingly. Since
667 * we do when we are uncertain of the device state, we take a bit
668 * of extra paranoia to try and ensure that the HWS takes the value
669 * we give and that it doesn't end up trapped inside the CPU!
670 */
671 if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
672 mb();
673 clflush(&engine->status_page.page_addr[reg]);
674 engine->status_page.page_addr[reg] = value;
675 clflush(&engine->status_page.page_addr[reg]);
676 mb();
677 } else {
678 WRITE_ONCE(engine->status_page.page_addr[reg], value);
679 }
b70ec5bf
MK
680}
681
e2828914 682/*
311bd68e
CW
683 * Reads a dword out of the status page, which is written to from the command
684 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
685 * MI_STORE_DATA_IMM.
686 *
687 * The following dwords have a reserved meaning:
688 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
689 * 0x04: ring 0 head pointer
690 * 0x05: ring 1 head pointer (915-class)
691 * 0x06: ring 2 head pointer (915-class)
692 * 0x10-0x1b: Context status DWords (GM45)
693 * 0x1f: Last written status offset. (GM45)
b07da53c 694 * 0x20-0x2f: Reserved (Gen6+)
311bd68e 695 *
b07da53c 696 * The area from dword 0x30 to 0x3ff is available for driver usage.
311bd68e 697 */
b07da53c 698#define I915_GEM_HWS_INDEX 0x30
7c17d377 699#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
3b8a8a30
MW
700#define I915_GEM_HWS_PREEMPT_INDEX 0x32
701#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
b07da53c 702#define I915_GEM_HWS_SCRATCH_INDEX 0x40
9a289771 703#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
311bd68e 704
6d2cb5aa 705#define I915_HWS_CSB_BUF0_INDEX 0x10
767a983a
CW
706#define I915_HWS_CSB_WRITE_INDEX 0x1f
707#define CNL_HWS_CSB_WRITE_INDEX 0x2f
6d2cb5aa 708
7e37f889
CW
709struct intel_ring *
710intel_engine_create_ring(struct intel_engine_cs *engine, int size);
d822bb18
CW
711int intel_ring_pin(struct intel_ring *ring,
712 struct drm_i915_private *i915,
713 unsigned int offset_bias);
e6ba9992 714void intel_ring_reset(struct intel_ring *ring, u32 tail);
95aebcb2 715unsigned int intel_ring_update_space(struct intel_ring *ring);
aad29fbb 716void intel_ring_unpin(struct intel_ring *ring);
7e37f889 717void intel_ring_free(struct intel_ring *ring);
84c2377f 718
7e37f889
CW
719void intel_engine_stop(struct intel_engine_cs *engine);
720void intel_engine_cleanup(struct intel_engine_cs *engine);
96f298aa 721
821ed7df
CW
722void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
723
bba09b12 724int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
406ea8d2 725
fd138212 726int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
5e5655c3
CW
727u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
728 unsigned int n);
406ea8d2 729
73dec95e
TU
730static inline void
731intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
09246732 732{
8f942018
CW
733 /* Dummy function.
734 *
735 * This serves as a placeholder in the code so that the reader
736 * can compare against the preceding intel_ring_begin() and
737 * check that the number of dwords emitted matches the space
738 * reserved for the command packet (i.e. the value passed to
739 * intel_ring_begin()).
c5efa1ad 740 */
e6ba9992 741 GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
8f942018
CW
742}
743
73dec95e 744static inline u32
450362d3
CW
745intel_ring_wrap(const struct intel_ring *ring, u32 pos)
746{
747 return pos & (ring->size - 1);
748}
749
750static inline u32
751intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
8f942018
CW
752{
753 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
73dec95e
TU
754 u32 offset = addr - req->ring->vaddr;
755 GEM_BUG_ON(offset > req->ring->size);
450362d3 756 return intel_ring_wrap(req->ring, offset);
09246732 757}
406ea8d2 758
ed1501d4
CW
759static inline void
760assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
761{
762 /* We could combine these into a single tail operation, but keeping
763 * them as seperate tests will help identify the cause should one
764 * ever fire.
765 */
766 GEM_BUG_ON(!IS_ALIGNED(tail, 8));
767 GEM_BUG_ON(tail >= ring->size);
605d5b32
CW
768
769 /*
770 * "Ring Buffer Use"
771 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
772 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
773 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
774 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
775 * same cacheline, the Head Pointer must not be greater than the Tail
776 * Pointer."
777 *
778 * We use ring->head as the last known location of the actual RING_HEAD,
779 * it may have advanced but in the worst case it is equally the same
780 * as ring->head and so we should never program RING_TAIL to advance
781 * into the same cacheline as ring->head.
782 */
783#define cacheline(a) round_down(a, CACHELINE_BYTES)
784 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
785 tail < ring->head);
786#undef cacheline
ed1501d4
CW
787}
788
e6ba9992
CW
789static inline unsigned int
790intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
791{
792 /* Whilst writes to the tail are strictly order, there is no
793 * serialisation between readers and the writers. The tail may be
794 * read by i915_gem_request_retire() just as it is being updated
795 * by execlists, as although the breadcrumb is complete, the context
796 * switch hasn't been seen.
797 */
798 assert_ring_tail_valid(ring, tail);
799 ring->tail = tail;
800 return tail;
801}
09246732 802
73cb9701 803void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
8187a2b7 804
019bf277
TU
805void intel_engine_setup_common(struct intel_engine_cs *engine);
806int intel_engine_init_common(struct intel_engine_cs *engine);
adc320c4 807int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
96a945aa 808void intel_engine_cleanup_common(struct intel_engine_cs *engine);
019bf277 809
8b3e2d36
TU
810int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
811int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
8b3e2d36
TU
812int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
813int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
8187a2b7 814
7e37f889 815u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
1b36595f
CW
816u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
817
1b7744e7
CW
818static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
819{
820 return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
821}
79f321b7 822
cb399eab
CW
823static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
824{
825 /* We are only peeking at the tail of the submit queue (and not the
826 * queue itself) in order to gain a hint as to the current active
827 * state of the engine. Callers are not expected to be taking
828 * engine->timeline->lock, nor are they expected to be concerned
829 * wtih serialising this hint with anything, so document it as
830 * a hint and nothing more.
831 */
9b6586ae 832 return READ_ONCE(engine->timeline->seqno);
cb399eab
CW
833}
834
0bc40be8 835int init_workarounds_ring(struct intel_engine_cs *engine);
4ac9659e 836int intel_ring_workarounds_emit(struct drm_i915_gem_request *req);
771b9a53 837
0e704476
CW
838void intel_engine_get_instdone(struct intel_engine_cs *engine,
839 struct intel_instdone *instdone);
840
29b1b415
JH
841/*
842 * Arbitrary size for largest possible 'add request' sequence. The code paths
843 * are complex and variable. Empirical measurement shows that the worst case
596e5efc
CW
844 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
845 * we need to allocate double the largest single packet within that emission
846 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
29b1b415 847 */
596e5efc 848#define MIN_SPACE_FOR_ADD_REQUEST 336
29b1b415 849
a58c01aa
CW
850static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
851{
57e88531 852 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
a58c01aa
CW
853}
854
3b8a8a30
MW
855static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
856{
857 return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
858}
859
688e6c72 860/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
688e6c72
CW
861int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
862
56299fb7
CW
863static inline void intel_wait_init(struct intel_wait *wait,
864 struct drm_i915_gem_request *rq)
688e6c72
CW
865{
866 wait->tsk = current;
56299fb7 867 wait->request = rq;
754c9fd5
CW
868}
869
870static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
871{
872 wait->tsk = current;
873 wait->seqno = seqno;
874}
875
876static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
877{
878 return wait->seqno;
879}
880
881static inline bool
882intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
883{
688e6c72 884 wait->seqno = seqno;
754c9fd5
CW
885 return intel_wait_has_seqno(wait);
886}
887
888static inline bool
889intel_wait_update_request(struct intel_wait *wait,
890 const struct drm_i915_gem_request *rq)
891{
892 return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
893}
894
895static inline bool
896intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
897{
898 return wait->seqno == seqno;
899}
900
901static inline bool
902intel_wait_check_request(const struct intel_wait *wait,
903 const struct drm_i915_gem_request *rq)
904{
905 return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
688e6c72
CW
906}
907
908static inline bool intel_wait_complete(const struct intel_wait *wait)
909{
910 return RB_EMPTY_NODE(&wait->node);
911}
912
913bool intel_engine_add_wait(struct intel_engine_cs *engine,
914 struct intel_wait *wait);
915void intel_engine_remove_wait(struct intel_engine_cs *engine,
916 struct intel_wait *wait);
f7b02a52
CW
917void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
918 bool wakeup);
9eb143bb 919void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
688e6c72 920
dbd6ef29 921static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
688e6c72 922{
61d3dc70 923 return READ_ONCE(engine->breadcrumbs.irq_wait);
688e6c72
CW
924}
925
8d769ea7
CW
926unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
927#define ENGINE_WAKEUP_WAITER BIT(0)
67b807a8
CW
928#define ENGINE_WAKEUP_ASLEEP BIT(1)
929
bcbd5c33
CW
930void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
931void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
932
67b807a8
CW
933void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
934void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 935
ad07dfcd 936void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 937void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
9b6586ae 938bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
688e6c72 939
9f235dfa
TU
940static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
941{
942 memset(batch, 0, 6 * sizeof(u32));
943
944 batch[0] = GFX_OP_PIPE_CONTROL(6);
945 batch[1] = flags;
946 batch[2] = offset;
947
948 return batch + 6;
949}
950
df77cd83
MW
951static inline u32 *
952gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
953{
954 /* We're using qword write, offset should be aligned to 8 bytes. */
955 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
956
957 /* w/a for post sync ops following a GPGPU operation we
958 * need a prior CS_STALL, which is emitted by the flush
959 * following the batch.
960 */
961 *cs++ = GFX_OP_PIPE_CONTROL(6);
962 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
963 PIPE_CONTROL_QW_WRITE;
964 *cs++ = gtt_offset;
965 *cs++ = 0;
966 *cs++ = value;
967 /* We're thrashing one dword of HWS. */
968 *cs++ = 0;
969
970 return cs;
971}
972
973static inline u32 *
974gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
975{
976 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
977 GEM_BUG_ON(gtt_offset & (1 << 5));
978 /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
979 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
980
981 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
982 *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
983 *cs++ = 0;
984 *cs++ = value;
985
986 return cs;
987}
988
5400367a 989bool intel_engine_is_idle(struct intel_engine_cs *engine);
05425249 990bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
5400367a 991
20ccd4d3
CW
992bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
993
aba5e278
CW
994void intel_engines_park(struct drm_i915_private *i915);
995void intel_engines_unpark(struct drm_i915_private *i915);
996
ff44ad51 997void intel_engines_reset_default_submission(struct drm_i915_private *i915);
d2b4b979 998unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
ff44ad51 999
90cad095 1000bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
f2f5c061 1001
0db18b17
CW
1002__printf(3, 4)
1003void intel_engine_dump(struct intel_engine_cs *engine,
1004 struct drm_printer *m,
1005 const char *header, ...);
f636edb2 1006
b46a33e2
TU
1007struct intel_engine_cs *
1008intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
1009
30e17b78
TU
1010static inline void intel_engine_context_in(struct intel_engine_cs *engine)
1011{
1012 unsigned long flags;
1013
1014 if (READ_ONCE(engine->stats.enabled) == 0)
1015 return;
1016
1017 spin_lock_irqsave(&engine->stats.lock, flags);
1018
1019 if (engine->stats.enabled > 0) {
1020 if (engine->stats.active++ == 0)
1021 engine->stats.start = ktime_get();
1022 GEM_BUG_ON(engine->stats.active == 0);
1023 }
1024
1025 spin_unlock_irqrestore(&engine->stats.lock, flags);
1026}
1027
1028static inline void intel_engine_context_out(struct intel_engine_cs *engine)
1029{
1030 unsigned long flags;
1031
1032 if (READ_ONCE(engine->stats.enabled) == 0)
1033 return;
1034
1035 spin_lock_irqsave(&engine->stats.lock, flags);
1036
1037 if (engine->stats.enabled > 0) {
1038 ktime_t last;
1039
1040 if (engine->stats.active && --engine->stats.active == 0) {
1041 /*
1042 * Decrement the active context count and in case GPU
1043 * is now idle add up to the running total.
1044 */
1045 last = ktime_sub(ktime_get(), engine->stats.start);
1046
1047 engine->stats.total = ktime_add(engine->stats.total,
1048 last);
1049 } else if (engine->stats.active == 0) {
1050 /*
1051 * After turning on engine stats, context out might be
1052 * the first event in which case we account from the
1053 * time stats gathering was turned on.
1054 */
1055 last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1056
1057 engine->stats.total = ktime_add(engine->stats.total,
1058 last);
1059 }
1060 }
1061
1062 spin_unlock_irqrestore(&engine->stats.lock, flags);
1063}
1064
1065int intel_enable_engine_stats(struct intel_engine_cs *engine);
1066void intel_disable_engine_stats(struct intel_engine_cs *engine);
1067
1068ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
1069
8187a2b7 1070#endif /* _INTEL_RINGBUFFER_H_ */