Merge tag 'devprop-5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-block.git] / drivers / gpu / drm / i915 / gt / intel_engine_cs.c
CommitLineData
88d2ba2e
TU
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
f636edb2
CW
25#include <drm/drm_print.h>
26
10be98a7
CW
27#include "gem/i915_gem_context.h"
28
88d2ba2e 29#include "i915_drv.h"
112ed2d3 30
b3786b29 31#include "intel_breadcrumbs.h"
4f88f874 32#include "intel_context.h"
112ed2d3 33#include "intel_engine.h"
79ffac85 34#include "intel_engine_pm.h"
750e76b4 35#include "intel_engine_user.h"
70a2b431 36#include "intel_execlists_submission.h"
4f88f874
CW
37#include "intel_gt.h"
38#include "intel_gt_requests.h"
cd699527 39#include "intel_gt_pm.h"
9fd96c06 40#include "intel_lrc_reg.h"
112ed2d3 41#include "intel_reset.h"
2871ea85 42#include "intel_ring.h"
43aaadc6 43#include "uc/intel_guc_submission.h"
88d2ba2e 44
63ffbcda
JL
45/* Haswell does have the CXT_SIZE register however it does not appear to be
46 * valid. Now, docs explain in dwords what is in the context object. The full
47 * size is 70720 bytes, however, the power context and execlist context will
48 * never be saved (power context is stored elsewhere, and execlists don't work
49 * on HSW) - so the final size, including the extra state required for the
50 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
51 */
52#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
63ffbcda 53
7ab4adbd 54#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
63ffbcda
JL
55#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
56#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
3cf1934a 57#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
b86aa445 58#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
63ffbcda
JL
59
60#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
61
80b216b9 62#define MAX_MMIO_BASES 3
b8400f01 63struct engine_info {
237ae7c7 64 unsigned int hw_id;
0908180b
DCS
65 u8 class;
66 u8 instance;
80b216b9
DCS
67 /* mmio bases table *must* be sorted in reverse gen order */
68 struct engine_mmio_base {
69 u32 gen : 8;
70 u32 base : 24;
71 } mmio_bases[MAX_MMIO_BASES];
b8400f01
OM
72};
73
74static const struct engine_info intel_engines[] = {
8a68d464
CW
75 [RCS0] = {
76 .hw_id = RCS0_HW,
0908180b
DCS
77 .class = RENDER_CLASS,
78 .instance = 0,
80b216b9
DCS
79 .mmio_bases = {
80 { .gen = 1, .base = RENDER_RING_BASE }
81 },
88d2ba2e 82 },
8a68d464
CW
83 [BCS0] = {
84 .hw_id = BCS0_HW,
0908180b
DCS
85 .class = COPY_ENGINE_CLASS,
86 .instance = 0,
80b216b9
DCS
87 .mmio_bases = {
88 { .gen = 6, .base = BLT_RING_BASE }
89 },
88d2ba2e 90 },
8a68d464
CW
91 [VCS0] = {
92 .hw_id = VCS0_HW,
0908180b
DCS
93 .class = VIDEO_DECODE_CLASS,
94 .instance = 0,
80b216b9
DCS
95 .mmio_bases = {
96 { .gen = 11, .base = GEN11_BSD_RING_BASE },
97 { .gen = 6, .base = GEN6_BSD_RING_BASE },
98 { .gen = 4, .base = BSD_RING_BASE }
99 },
88d2ba2e 100 },
8a68d464
CW
101 [VCS1] = {
102 .hw_id = VCS1_HW,
0908180b
DCS
103 .class = VIDEO_DECODE_CLASS,
104 .instance = 1,
80b216b9
DCS
105 .mmio_bases = {
106 { .gen = 11, .base = GEN11_BSD2_RING_BASE },
107 { .gen = 8, .base = GEN8_BSD2_RING_BASE }
108 },
88d2ba2e 109 },
8a68d464
CW
110 [VCS2] = {
111 .hw_id = VCS2_HW,
5f79e7c6
OM
112 .class = VIDEO_DECODE_CLASS,
113 .instance = 2,
80b216b9
DCS
114 .mmio_bases = {
115 { .gen = 11, .base = GEN11_BSD3_RING_BASE }
116 },
5f79e7c6 117 },
8a68d464
CW
118 [VCS3] = {
119 .hw_id = VCS3_HW,
5f79e7c6
OM
120 .class = VIDEO_DECODE_CLASS,
121 .instance = 3,
80b216b9
DCS
122 .mmio_bases = {
123 { .gen = 11, .base = GEN11_BSD4_RING_BASE }
124 },
5f79e7c6 125 },
8a68d464
CW
126 [VECS0] = {
127 .hw_id = VECS0_HW,
0908180b
DCS
128 .class = VIDEO_ENHANCEMENT_CLASS,
129 .instance = 0,
80b216b9
DCS
130 .mmio_bases = {
131 { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
132 { .gen = 7, .base = VEBOX_RING_BASE }
133 },
88d2ba2e 134 },
8a68d464
CW
135 [VECS1] = {
136 .hw_id = VECS1_HW,
5f79e7c6
OM
137 .class = VIDEO_ENHANCEMENT_CLASS,
138 .instance = 1,
80b216b9
DCS
139 .mmio_bases = {
140 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
141 },
5f79e7c6 142 },
88d2ba2e
TU
143};
144
63ffbcda 145/**
ffd5ce22 146 * intel_engine_context_size() - return the size of the context for an engine
92c964ca 147 * @gt: the gt
63ffbcda
JL
148 * @class: engine class
149 *
150 * Each engine class may require a different amount of space for a context
151 * image.
152 *
153 * Return: size (in bytes) of an engine class specific context image
154 *
155 * Note: this size includes the HWSP, which is part of the context image
156 * in LRC mode, but does not include the "shared data page" used with
157 * GuC submission. The caller should account for this if using the GuC.
158 */
92c964ca 159u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
63ffbcda 160{
92c964ca 161 struct intel_uncore *uncore = gt->uncore;
63ffbcda
JL
162 u32 cxt_size;
163
164 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
165
166 switch (class) {
167 case RENDER_CLASS:
92c964ca 168 switch (INTEL_GEN(gt->i915)) {
63ffbcda 169 default:
92c964ca 170 MISSING_CASE(INTEL_GEN(gt->i915));
7ab4adbd 171 return DEFAULT_LR_CONTEXT_RENDER_SIZE;
0aa5427a 172 case 12:
b86aa445
TU
173 case 11:
174 return GEN11_LR_CONTEXT_RENDER_SIZE;
f65f8417 175 case 10:
7fd0b1a2 176 return GEN10_LR_CONTEXT_RENDER_SIZE;
63ffbcda
JL
177 case 9:
178 return GEN9_LR_CONTEXT_RENDER_SIZE;
179 case 8:
fb5c551a 180 return GEN8_LR_CONTEXT_RENDER_SIZE;
63ffbcda 181 case 7:
92c964ca 182 if (IS_HASWELL(gt->i915))
63ffbcda
JL
183 return HSW_CXT_TOTAL_SIZE;
184
92c964ca 185 cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
63ffbcda
JL
186 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
187 PAGE_SIZE);
188 case 6:
92c964ca 189 cxt_size = intel_uncore_read(uncore, CXT_SIZE);
63ffbcda
JL
190 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
191 PAGE_SIZE);
192 case 5:
9ce9bdb0 193 case 4:
1215d28e
CW
194 /*
195 * There is a discrepancy here between the size reported
196 * by the register and the size of the context layout
197 * in the docs. Both are described as authorative!
198 *
199 * The discrepancy is on the order of a few cachelines,
200 * but the total is under one page (4k), which is our
201 * minimum allocation anyway so it should all come
202 * out in the wash.
203 */
92c964ca 204 cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
ce016437
WK
205 drm_dbg(&gt->i915->drm,
206 "gen%d CXT_SIZE = %d bytes [0x%08x]\n",
207 INTEL_GEN(gt->i915), cxt_size * 64,
208 cxt_size - 1);
1215d28e 209 return round_up(cxt_size * 64, PAGE_SIZE);
63ffbcda
JL
210 case 3:
211 case 2:
212 /* For the special day when i810 gets merged. */
213 case 1:
214 return 0;
215 }
216 break;
217 default:
218 MISSING_CASE(class);
df561f66 219 fallthrough;
63ffbcda
JL
220 case VIDEO_DECODE_CLASS:
221 case VIDEO_ENHANCEMENT_CLASS:
222 case COPY_ENGINE_CLASS:
92c964ca 223 if (INTEL_GEN(gt->i915) < 8)
63ffbcda
JL
224 return 0;
225 return GEN8_LR_CONTEXT_OTHER_SIZE;
226 }
227}
228
80b216b9
DCS
229static u32 __engine_mmio_base(struct drm_i915_private *i915,
230 const struct engine_mmio_base *bases)
231{
232 int i;
233
234 for (i = 0; i < MAX_MMIO_BASES; i++)
235 if (INTEL_GEN(i915) >= bases[i].gen)
236 break;
237
238 GEM_BUG_ON(i == MAX_MMIO_BASES);
239 GEM_BUG_ON(!bases[i].base);
240
241 return bases[i].base;
242}
243
2edda80d 244static void __sprint_engine_name(struct intel_engine_cs *engine)
74419daa 245{
2edda80d
CW
246 /*
247 * Before we know what the uABI name for this engine will be,
248 * we still would like to keep track of this engine in the debug logs.
249 * We throw in a ' here as a reminder that this isn't its final name.
250 */
251 GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
252 intel_engine_class_repr(engine->class),
253 engine->instance) >= sizeof(engine->name));
74419daa
DCS
254}
255
060f2322
CW
256void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
257{
060f2322
CW
258 /*
259 * Though they added more rings on g4x/ilk, they did not add
260 * per-engine HWSTAM until gen6.
261 */
baba6e57 262 if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
060f2322
CW
263 return;
264
baba6e57
DCS
265 if (INTEL_GEN(engine->i915) >= 3)
266 ENGINE_WRITE(engine, RING_HWSTAM, mask);
060f2322 267 else
baba6e57 268 ENGINE_WRITE16(engine, RING_HWSTAM, mask);
060f2322
CW
269}
270
271static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
272{
273 /* Mask off all writes into the unknown HWSP */
274 intel_engine_set_hwsp_writemask(engine, ~0u);
275}
276
750e76b4 277static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
88d2ba2e
TU
278{
279 const struct engine_info *info = &intel_engines[id];
07bcfd12 280 struct drm_i915_private *i915 = gt->i915;
3b3f1650
AG
281 struct intel_engine_cs *engine;
282
ac52da6a
DCS
283 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
284 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
285
a50134b1
TU
286 if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
287 return -EINVAL;
288
bbb8a9d7 289 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
b46a33e2
TU
290 return -EINVAL;
291
bbb8a9d7 292 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
b46a33e2
TU
293 return -EINVAL;
294
750e76b4 295 if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
b46a33e2
TU
296 return -EINVAL;
297
3b3f1650
AG
298 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
299 if (!engine)
300 return -ENOMEM;
88d2ba2e 301
8a68d464
CW
302 BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
303
88d2ba2e 304 engine->id = id;
a50134b1 305 engine->legacy_idx = INVALID_ENGINE;
8a68d464 306 engine->mask = BIT(id);
07bcfd12 307 engine->i915 = i915;
750e76b4
CW
308 engine->gt = gt;
309 engine->uncore = gt->uncore;
07bcfd12 310 engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
c784e524
JH
311 engine->hw_id = info->hw_id;
312 engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
2edda80d 313
0908180b
DCS
314 engine->class = info->class;
315 engine->instance = info->instance;
2edda80d 316 __sprint_engine_name(engine);
88d2ba2e 317
058179e7
CW
318 engine->props.heartbeat_interval_ms =
319 CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
062444bb
CW
320 engine->props.max_busywait_duration_ns =
321 CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
3a7a92ab
CW
322 engine->props.preempt_timeout_ms =
323 CONFIG_DRM_I915_PREEMPT_TIMEOUT;
a8c51ed2
CW
324 engine->props.stop_timeout_ms =
325 CONFIG_DRM_I915_STOP_TIMEOUT;
b79029b2
CW
326 engine->props.timeslice_duration_ms =
327 CONFIG_DRM_I915_TIMESLICE_DURATION;
a8c51ed2 328
07bcfd12
TU
329 /* Override to uninterruptible for OpenCL workloads. */
330 if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
331 engine->props.preempt_timeout_ms = 0;
332
7a0ba6b4
CW
333 engine->defaults = engine->props; /* never to change again */
334
92c964ca 335 engine->context_size = intel_engine_context_size(gt, engine->class);
63ffbcda
JL
336 if (WARN_ON(engine->context_size > BIT(20)))
337 engine->context_size = 0;
481827b4 338 if (engine->context_size)
07bcfd12 339 DRIVER_CAPS(i915)->has_logical_contexts = true;
63ffbcda 340
0de9136d
CW
341 /* Nothing to do here, execute in order of dependencies */
342 engine->schedule = NULL;
343
b81e4d9b 344 ewma__engine_latency_init(&engine->latency);
f530a41d 345 seqcount_init(&engine->stats.lock);
30e17b78 346
3fc03069
CD
347 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
348
060f2322
CW
349 /* Scrub mmio state on takeover */
350 intel_engine_sanitize_mmio(engine);
351
750e76b4 352 gt->engine_class[info->class][info->instance] = engine;
a50134b1 353 gt->engine[id] = engine;
750e76b4 354
3b3f1650 355 return 0;
88d2ba2e
TU
356}
357
c5d3e39c
TU
358static void __setup_engine_capabilities(struct intel_engine_cs *engine)
359{
360 struct drm_i915_private *i915 = engine->i915;
361
362 if (engine->class == VIDEO_DECODE_CLASS) {
363 /*
364 * HEVC support is present on first engine instance
365 * before Gen11 and on all instances afterwards.
366 */
367 if (INTEL_GEN(i915) >= 11 ||
368 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
369 engine->uabi_capabilities |=
370 I915_VIDEO_CLASS_CAPABILITY_HEVC;
371
372 /*
373 * SFC block is present only on even logical engine
374 * instances.
375 */
376 if ((INTEL_GEN(i915) >= 11 &&
ad18fa0f
VSD
377 (engine->gt->info.vdbox_sfc_access &
378 BIT(engine->instance))) ||
c5d3e39c
TU
379 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
380 engine->uabi_capabilities |=
381 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
382 } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
383 if (INTEL_GEN(i915) >= 9)
384 engine->uabi_capabilities |=
385 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
386 }
387}
388
3ea951c6 389static void intel_setup_engine_capabilities(struct intel_gt *gt)
c5d3e39c
TU
390{
391 struct intel_engine_cs *engine;
392 enum intel_engine_id id;
393
3ea951c6 394 for_each_engine(engine, gt, id)
c5d3e39c
TU
395 __setup_engine_capabilities(engine);
396}
397
45b9c968 398/**
e26b6d43 399 * intel_engines_release() - free the resources allocated for Command Streamers
b0258bf2 400 * @gt: pointer to struct intel_gt
45b9c968 401 */
e26b6d43 402void intel_engines_release(struct intel_gt *gt)
45b9c968
CW
403{
404 struct intel_engine_cs *engine;
405 enum intel_engine_id id;
406
cd699527
CW
407 /*
408 * Before we release the resources held by engine, we must be certain
409 * that the HW is no longer accessing them -- having the GPU scribble
410 * to or read from a page being used for something else causes no end
411 * of fun.
412 *
413 * The GPU should be reset by this point, but assume the worst just
414 * in case we aborted before completely initialising the engines.
415 */
416 GEM_BUG_ON(intel_gt_pm_is_awake(gt));
417 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
418 __intel_gt_reset(gt, ALL_ENGINES);
419
e26b6d43 420 /* Decouple the backend; but keep the layout for late GPU resets */
b0258bf2 421 for_each_engine(engine, gt, id) {
e26b6d43
CW
422 if (!engine->release)
423 continue;
424
0b0b2549
CW
425 intel_wakeref_wait_for_idle(&engine->wakeref);
426 GEM_BUG_ON(intel_engine_pm_is_awake(engine));
427
e26b6d43
CW
428 engine->release(engine);
429 engine->release = NULL;
430
431 memset(&engine->reset, 0, sizeof(engine->reset));
45b9c968
CW
432 }
433}
434
848862e6
CW
435void intel_engine_free_request_pool(struct intel_engine_cs *engine)
436{
437 if (!engine->request_pool)
438 return;
439
440 kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
441}
442
e26b6d43
CW
443void intel_engines_free(struct intel_gt *gt)
444{
445 struct intel_engine_cs *engine;
446 enum intel_engine_id id;
447
43acd651
CW
448 /* Free the requests! dma-resv keeps fences around for an eternity */
449 rcu_barrier();
450
e26b6d43 451 for_each_engine(engine, gt, id) {
848862e6 452 intel_engine_free_request_pool(engine);
e26b6d43
CW
453 kfree(engine);
454 gt->engine[id] = NULL;
455 }
456}
457
f6beb381
DCS
458/*
459 * Determine which engines are fused off in our particular hardware.
460 * Note that we have a catch-22 situation where we need to be able to access
461 * the blitter forcewake domain to read the engine fuses, but at the same time
462 * we need to know which engines are available on the system to know which
463 * forcewake domains are present. We solve this by intializing the forcewake
464 * domains based on the full engine mask in the platform capabilities before
465 * calling this function and pruning the domains for fused-off engines
466 * afterwards.
467 */
468static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
469{
470 struct drm_i915_private *i915 = gt->i915;
792592e7 471 struct intel_gt_info *info = &gt->info;
f6beb381
DCS
472 struct intel_uncore *uncore = gt->uncore;
473 unsigned int logical_vdbox = 0;
474 unsigned int i;
475 u32 media_fuse;
476 u16 vdbox_mask;
477 u16 vebox_mask;
478
792592e7
DCS
479 info->engine_mask = INTEL_INFO(i915)->platform_engine_mask;
480
f6beb381
DCS
481 if (INTEL_GEN(i915) < 11)
482 return info->engine_mask;
483
484 media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
485
486 vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
487 vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
488 GEN11_GT_VEBOX_DISABLE_SHIFT;
489
490 for (i = 0; i < I915_MAX_VCS; i++) {
491 if (!HAS_ENGINE(gt, _VCS(i))) {
492 vdbox_mask &= ~BIT(i);
493 continue;
494 }
495
496 if (!(BIT(i) & vdbox_mask)) {
497 info->engine_mask &= ~BIT(_VCS(i));
498 drm_dbg(&i915->drm, "vcs%u fused off\n", i);
499 continue;
500 }
501
502 /*
503 * In Gen11, only even numbered logical VDBOXes are
504 * hooked up to an SFC (Scaler & Format Converter) unit.
505 * In TGL each VDBOX has access to an SFC.
506 */
507 if (INTEL_GEN(i915) >= 12 || logical_vdbox++ % 2 == 0)
792592e7 508 gt->info.vdbox_sfc_access |= BIT(i);
f6beb381
DCS
509 }
510 drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
511 vdbox_mask, VDBOX_MASK(gt));
512 GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
513
514 for (i = 0; i < I915_MAX_VECS; i++) {
515 if (!HAS_ENGINE(gt, _VECS(i))) {
516 vebox_mask &= ~BIT(i);
517 continue;
518 }
519
520 if (!(BIT(i) & vebox_mask)) {
521 info->engine_mask &= ~BIT(_VECS(i));
522 drm_dbg(&i915->drm, "vecs%u fused off\n", i);
523 }
524 }
525 drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
526 vebox_mask, VEBOX_MASK(gt));
527 GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
528
529 return info->engine_mask;
530}
531
88d2ba2e 532/**
63ffbcda 533 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
adcb5264 534 * @gt: pointer to struct intel_gt
88d2ba2e
TU
535 *
536 * Return: non-zero if the initialization failed.
537 */
adcb5264 538int intel_engines_init_mmio(struct intel_gt *gt)
88d2ba2e 539{
adcb5264 540 struct drm_i915_private *i915 = gt->i915;
f6beb381 541 const unsigned int engine_mask = init_engine_mask(gt);
5f9be054 542 unsigned int mask = 0;
88d2ba2e 543 unsigned int i;
bb8f0f5a 544 int err;
88d2ba2e 545
0d4c351a
PB
546 drm_WARN_ON(&i915->drm, engine_mask == 0);
547 drm_WARN_ON(&i915->drm, engine_mask &
548 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
88d2ba2e 549
50d84418 550 if (i915_inject_probe_failure(i915))
645ff9e3
MW
551 return -ENODEV;
552
88d2ba2e 553 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
242613af 554 if (!HAS_ENGINE(gt, i))
88d2ba2e
TU
555 continue;
556
adcb5264 557 err = intel_engine_setup(gt, i);
bb8f0f5a
CW
558 if (err)
559 goto cleanup;
560
8a68d464 561 mask |= BIT(i);
bb8f0f5a
CW
562 }
563
564 /*
565 * Catch failures to update intel_engines table when the new engines
566 * are added to the driver by a warning and disabling the forgotten
567 * engines.
568 */
0d4c351a 569 if (drm_WARN_ON(&i915->drm, mask != engine_mask))
792592e7 570 gt->info.engine_mask = mask;
bb8f0f5a 571
792592e7 572 gt->info.num_engines = hweight32(mask);
bb8f0f5a 573
adcb5264 574 intel_gt_check_and_clear_faults(gt);
ce453b3e 575
3ea951c6 576 intel_setup_engine_capabilities(gt);
c5d3e39c 577
f6beb381
DCS
578 intel_uncore_prune_engine_fw_domains(gt->uncore, gt);
579
bb8f0f5a
CW
580 return 0;
581
582cleanup:
e26b6d43 583 intel_engines_free(gt);
bb8f0f5a
CW
584 return err;
585}
586
79ffac85 587void intel_engine_init_execlists(struct intel_engine_cs *engine)
19df9a57
MK
588{
589 struct intel_engine_execlists * const execlists = &engine->execlists;
590
76e70087 591 execlists->port_mask = 1;
410ed573 592 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
76e70087
MK
593 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
594
22b7a426
CW
595 memset(execlists->pending, 0, sizeof(execlists->pending));
596 execlists->active =
597 memset(execlists->inflight, 0, sizeof(execlists->inflight));
598
4d97cbe0 599 execlists->queue_priority_hint = INT_MIN;
655250a8 600 execlists->queue = RB_ROOT_CACHED;
19df9a57
MK
601}
602
a0e731f4 603static void cleanup_status_page(struct intel_engine_cs *engine)
486e93f7 604{
0ca88ba0
CW
605 struct i915_vma *vma;
606
060f2322
CW
607 /* Prevent writes into HWSP after returning the page to the system */
608 intel_engine_set_hwsp_writemask(engine, ~0u);
609
0ca88ba0
CW
610 vma = fetch_and_zero(&engine->status_page.vma);
611 if (!vma)
612 return;
486e93f7 613
0ca88ba0
CW
614 if (!HWS_NEEDS_PHYSICAL(engine->i915))
615 i915_vma_unpin(vma);
616
617 i915_gem_object_unpin_map(vma->obj);
c017cf6b 618 i915_gem_object_put(vma->obj);
0ca88ba0
CW
619}
620
621static int pin_ggtt_status_page(struct intel_engine_cs *engine,
622 struct i915_vma *vma)
623{
624 unsigned int flags;
625
4dc0a7ca 626 if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
0ca88ba0
CW
627 /*
628 * On g33, we cannot place HWS above 256MiB, so
629 * restrict its pinning to the low mappable arena.
630 * Though this restriction is not documented for
631 * gen4, gen5, or byt, they also behave similarly
632 * and hang if the HWS is placed at the top of the
633 * GTT. To generalise, it appears that all !llc
634 * platforms have issues with us placing the HWS
635 * above the mappable region (even though we never
636 * actually map it).
637 */
e3793468 638 flags = PIN_MAPPABLE;
0ca88ba0 639 else
e3793468 640 flags = PIN_HIGH;
486e93f7 641
47b08693 642 return i915_ggtt_pin(vma, NULL, 0, flags);
486e93f7
DCS
643}
644
645static int init_status_page(struct intel_engine_cs *engine)
646{
647 struct drm_i915_gem_object *obj;
648 struct i915_vma *vma;
486e93f7
DCS
649 void *vaddr;
650 int ret;
651
b436a5f8
CW
652 INIT_LIST_HEAD(&engine->status_page.timelines);
653
0ca88ba0
CW
654 /*
655 * Though the HWS register does support 36bit addresses, historically
656 * we have had hangs and corruption reported due to wild writes if
657 * the HWS is placed above 4G. We only allow objects to be allocated
658 * in GFP_DMA32 for i965, and no earlier physical address users had
659 * access to more than 4G.
660 */
486e93f7
DCS
661 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
662 if (IS_ERR(obj)) {
ce016437
WK
663 drm_err(&engine->i915->drm,
664 "Failed to allocate status page\n");
486e93f7
DCS
665 return PTR_ERR(obj);
666 }
667
a679f58d 668 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
486e93f7 669
ba4134a4 670 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
486e93f7
DCS
671 if (IS_ERR(vma)) {
672 ret = PTR_ERR(vma);
673 goto err;
674 }
675
486e93f7
DCS
676 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
677 if (IS_ERR(vaddr)) {
678 ret = PTR_ERR(vaddr);
0ca88ba0 679 goto err;
486e93f7
DCS
680 }
681
0ca88ba0 682 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
486e93f7 683 engine->status_page.vma = vma;
0ca88ba0
CW
684
685 if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
686 ret = pin_ggtt_status_page(engine, vma);
687 if (ret)
688 goto err_unpin;
689 }
690
486e93f7
DCS
691 return 0;
692
693err_unpin:
0ca88ba0 694 i915_gem_object_unpin_map(obj);
486e93f7
DCS
695err:
696 i915_gem_object_put(obj);
697 return ret;
698}
699
7d70a123 700static int engine_setup_common(struct intel_engine_cs *engine)
52954edd
CW
701{
702 int err;
703
ce476c80
CW
704 init_llist_head(&engine->barrier_tasks);
705
52954edd
CW
706 err = init_status_page(engine);
707 if (err)
708 return err;
709
b3786b29
CW
710 engine->breadcrumbs = intel_breadcrumbs_create(engine);
711 if (!engine->breadcrumbs) {
712 err = -ENOMEM;
713 goto err_status;
714 }
715
a829f033
TU
716 err = intel_engine_init_cmd_parser(engine);
717 if (err)
718 goto err_cmd_parser;
719
422d7df4 720 intel_engine_init_active(engine, ENGINE_PHYSICAL);
79ffac85 721 intel_engine_init_execlists(engine);
79ffac85 722 intel_engine_init__pm(engine);
4f88f874 723 intel_engine_init_retire(engine);
52954edd 724
09407579
CW
725 /* Use the whole device by default */
726 engine->sseu =
0b6613c6 727 intel_sseu_from_device_info(&engine->gt->info.sseu);
09407579 728
ab9e2f77
CW
729 intel_engine_init_workarounds(engine);
730 intel_engine_init_whitelist(engine);
731 intel_engine_init_ctx_wa(engine);
732
007c4578
DCS
733 if (INTEL_GEN(engine->i915) >= 12)
734 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
735
52954edd 736 return 0;
b3786b29 737
a829f033
TU
738err_cmd_parser:
739 intel_breadcrumbs_free(engine->breadcrumbs);
b3786b29
CW
740err_status:
741 cleanup_status_page(engine);
742 return err;
52954edd
CW
743}
744
e1a73a54
CW
745struct measure_breadcrumb {
746 struct i915_request rq;
e1a73a54 747 struct intel_ring ring;
e36ba817 748 u32 cs[2048];
e1a73a54
CW
749};
750
fb5970da 751static int measure_breadcrumb_dw(struct intel_context *ce)
e1a73a54 752{
fb5970da 753 struct intel_engine_cs *engine = ce->engine;
e1a73a54 754 struct measure_breadcrumb *frame;
5d8b1341 755 int dw;
e1a73a54 756
db56f974 757 GEM_BUG_ON(!engine->gt->scratch);
e1a73a54
CW
758
759 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
760 if (!frame)
761 return -ENOMEM;
762
fb5970da
CW
763 frame->rq.engine = engine;
764 frame->rq.context = ce;
765 rcu_assign_pointer(frame->rq.timeline, ce->timeline);
d19d71fc 766
e1a73a54
CW
767 frame->ring.vaddr = frame->cs;
768 frame->ring.size = sizeof(frame->cs);
e36ba817
CW
769 frame->ring.wrap =
770 BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
e1a73a54
CW
771 frame->ring.effective_size = frame->ring.size;
772 intel_ring_update_space(&frame->ring);
e1a73a54 773 frame->rq.ring = &frame->ring;
5013eb8c 774
fb5970da 775 mutex_lock(&ce->timeline->mutex);
d19d71fc 776 spin_lock_irq(&engine->active.lock);
fb5970da 777
85474441 778 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
fb5970da 779
d19d71fc 780 spin_unlock_irq(&engine->active.lock);
fb5970da 781 mutex_unlock(&ce->timeline->mutex);
d19d71fc 782
519a0194 783 GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
e1a73a54 784
52954edd 785 kfree(frame);
e1a73a54
CW
786 return dw;
787}
788
422d7df4
CW
789void
790intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
791{
792 INIT_LIST_HEAD(&engine->active.requests);
32ff621f 793 INIT_LIST_HEAD(&engine->active.hold);
422d7df4
CW
794
795 spin_lock_init(&engine->active.lock);
796 lockdep_set_subclass(&engine->active.lock, subclass);
797
798 /*
799 * Due to an interesting quirk in lockdep's internal debug tracking,
800 * after setting a subclass we must ensure the lock is used. Otherwise,
801 * nr_unused_locks is incremented once too often.
802 */
803#ifdef CONFIG_DEBUG_LOCK_ALLOC
804 local_irq_disable();
805 lock_map_acquire(&engine->active.lock.dep_map);
806 lock_map_release(&engine->active.lock.dep_map);
807 local_irq_enable();
808#endif
809}
810
38775829 811static struct intel_context *
d1bf5dd8
CW
812create_pinned_context(struct intel_engine_cs *engine,
813 unsigned int hwsp,
814 struct lock_class_key *key,
815 const char *name)
38775829
CW
816{
817 struct intel_context *ce;
818 int err;
819
e6ba7648 820 ce = intel_context_create(engine);
38775829
CW
821 if (IS_ERR(ce))
822 return ce;
823
e6ba7648 824 __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
d1bf5dd8 825 ce->timeline = page_pack_bits(NULL, hwsp);
48ae397b 826
e6ba7648 827 err = intel_context_pin(ce); /* perma-pin so it is always available */
38775829
CW
828 if (err) {
829 intel_context_put(ce);
830 return ERR_PTR(err);
831 }
832
6ad145fe
CW
833 /*
834 * Give our perma-pinned kernel timelines a separate lockdep class,
835 * so that we can use them from within the normal user timelines
836 * should we need to inject GPU operations during their request
837 * construction.
838 */
d1bf5dd8 839 lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
6ad145fe 840
38775829
CW
841 return ce;
842}
843
b436a5f8
CW
844static void destroy_pinned_context(struct intel_context *ce)
845{
846 struct intel_engine_cs *engine = ce->engine;
847 struct i915_vma *hwsp = engine->status_page.vma;
848
849 GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);
850
851 mutex_lock(&hwsp->vm->mutex);
852 list_del(&ce->timeline->engine_link);
853 mutex_unlock(&hwsp->vm->mutex);
854
855 intel_context_unpin(ce);
856 intel_context_put(ce);
857}
858
d1bf5dd8
CW
859static struct intel_context *
860create_kernel_context(struct intel_engine_cs *engine)
861{
862 static struct lock_class_key kernel;
863
864 return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
865 &kernel, "kernel_context");
866}
867
019bf277
TU
868/**
869 * intel_engines_init_common - initialize cengine state which might require hw access
870 * @engine: Engine to initialize.
871 *
872 * Initializes @engine@ structure members shared between legacy and execlists
873 * submission modes which do require hardware access.
874 *
875 * Typcally done at later stages of submission mode specific engine setup.
876 *
877 * Returns zero on success or an error code on failure.
878 */
7d70a123 879static int engine_init_common(struct intel_engine_cs *engine)
019bf277 880{
38775829 881 struct intel_context *ce;
019bf277
TU
882 int ret;
883
09975b86
CW
884 engine->set_default_submission(engine);
885
38775829
CW
886 /*
887 * We may need to do things with the shrinker which
e8a9c58f
CW
888 * require us to immediately switch back to the default
889 * context. This can cause a problem as pinning the
890 * default context also requires GTT space which may not
891 * be available. To avoid this we always pin the default
892 * context.
893 */
38775829
CW
894 ce = create_kernel_context(engine);
895 if (IS_ERR(ce))
896 return PTR_ERR(ce);
897
fb5970da
CW
898 ret = measure_breadcrumb_dw(ce);
899 if (ret < 0)
900 goto err_context;
901
902 engine->emit_fini_breadcrumb_dw = ret;
38775829 903 engine->kernel_context = ce;
019bf277 904
9dbfea98 905 return 0;
fb5970da
CW
906
907err_context:
908 intel_context_put(ce);
909 return ret;
019bf277 910}
96a945aa 911
7d70a123
CW
912int intel_engines_init(struct intel_gt *gt)
913{
914 int (*setup)(struct intel_engine_cs *engine);
915 struct intel_engine_cs *engine;
916 enum intel_engine_id id;
917 int err;
918
43aaadc6
DCS
919 if (intel_uc_uses_guc_submission(&gt->uc))
920 setup = intel_guc_submission_setup;
921 else if (HAS_EXECLISTS(gt->i915))
7d70a123
CW
922 setup = intel_execlists_submission_setup;
923 else
924 setup = intel_ring_submission_setup;
925
926 for_each_engine(engine, gt, id) {
927 err = engine_setup_common(engine);
928 if (err)
929 return err;
930
931 err = setup(engine);
932 if (err)
933 return err;
934
935 err = engine_init_common(engine);
936 if (err)
937 return err;
938
939 intel_engine_add_user(engine);
940 }
941
942 return 0;
943}
944
96a945aa
CW
945/**
946 * intel_engines_cleanup_common - cleans up the engine state created by
947 * the common initiailizers.
948 * @engine: Engine to cleanup.
949 *
950 * This cleans up everything created by the common helpers.
951 */
952void intel_engine_cleanup_common(struct intel_engine_cs *engine)
953{
422d7df4 954 GEM_BUG_ON(!list_empty(&engine->active.requests));
e26b6d43 955 tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
422d7df4 956
b3786b29 957 intel_breadcrumbs_free(engine->breadcrumbs);
486e93f7 958
4f88f874 959 intel_engine_fini_retire(engine);
7756e454 960 intel_engine_cleanup_cmd_parser(engine);
e8a9c58f 961
d2b4b979 962 if (engine->default_state)
be1cb55a 963 fput(engine->default_state);
d2b4b979 964
b436a5f8
CW
965 if (engine->kernel_context)
966 destroy_pinned_context(engine->kernel_context);
967
ce476c80 968 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
b436a5f8 969 cleanup_status_page(engine);
a89d1f92 970
452420d2 971 intel_wa_list_free(&engine->ctx_wa_list);
4a15c75c 972 intel_wa_list_free(&engine->wa_list);
69bcdecf 973 intel_wa_list_free(&engine->whitelist);
96a945aa 974}
1b36595f 975
faea1792
DCS
976/**
977 * intel_engine_resume - re-initializes the HW state of the engine
978 * @engine: Engine to resume.
979 *
980 * Returns zero on success or an error code on failure.
981 */
982int intel_engine_resume(struct intel_engine_cs *engine)
983{
984 intel_engine_apply_workarounds(engine);
985 intel_engine_apply_whitelist(engine);
986
987 return engine->resume(engine);
988}
989
3ceda3a4 990u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
1b36595f 991{
baba6e57
DCS
992 struct drm_i915_private *i915 = engine->i915;
993
1b36595f
CW
994 u64 acthd;
995
baba6e57
DCS
996 if (INTEL_GEN(i915) >= 8)
997 acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
998 else if (INTEL_GEN(i915) >= 4)
999 acthd = ENGINE_READ(engine, RING_ACTHD);
1b36595f 1000 else
baba6e57 1001 acthd = ENGINE_READ(engine, ACTHD);
1b36595f
CW
1002
1003 return acthd;
1004}
1005
3ceda3a4 1006u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
1b36595f 1007{
1b36595f
CW
1008 u64 bbaddr;
1009
baba6e57
DCS
1010 if (INTEL_GEN(engine->i915) >= 8)
1011 bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
1b36595f 1012 else
baba6e57 1013 bbaddr = ENGINE_READ(engine, RING_BBADDR);
1b36595f
CW
1014
1015 return bbaddr;
1016}
0e704476 1017
a8c51ed2
CW
1018static unsigned long stop_timeout(const struct intel_engine_cs *engine)
1019{
1020 if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
1021 return 0;
1022
1023 /*
1024 * If we are doing a normal GPU reset, we can take our time and allow
1025 * the engine to quiesce. We've stopped submission to the engine, and
1026 * if we wait long enough an innocent context should complete and
1027 * leave the engine idle. So they should not be caught unaware by
1028 * the forthcoming GPU reset (which usually follows the stop_cs)!
1029 */
1030 return READ_ONCE(engine->props.stop_timeout_ms);
1031}
1032
16f2941a
CW
1033static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
1034 int fast_timeout_us,
1035 int slow_timeout_ms)
3f6e9822 1036{
baba6e57 1037 struct intel_uncore *uncore = engine->uncore;
16f2941a 1038 const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
3f6e9822
CW
1039 int err;
1040
16f2941a
CW
1041 intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
1042 err = __intel_wait_for_register_fw(engine->uncore, mode,
1043 MODE_IDLE, MODE_IDLE,
1044 fast_timeout_us,
1045 slow_timeout_ms,
1046 NULL);
1047
1048 /* A final mmio read to let GPU writes be hopefully flushed to memory */
1049 intel_uncore_posting_read_fw(uncore, mode);
1050 return err;
1051}
1052
1053int intel_engine_stop_cs(struct intel_engine_cs *engine)
1054{
1055 int err = 0;
1056
d2d551c0 1057 if (INTEL_GEN(engine->i915) < 3)
3f6e9822
CW
1058 return -ENODEV;
1059
639f2f24 1060 ENGINE_TRACE(engine, "\n");
16f2941a 1061 if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
64362bc6
CW
1062 ENGINE_TRACE(engine,
1063 "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
1064 ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
1065 ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);
1066
1067 /*
1068 * Sometimes we observe that the idle flag is not
1069 * set even though the ring is empty. So double
1070 * check before giving up.
1071 */
1072 if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
1073 (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
1074 err = -ETIMEDOUT;
3f6e9822
CW
1075 }
1076
3f6e9822
CW
1077 return err;
1078}
1079
a99b32a6
CW
1080void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
1081{
639f2f24 1082 ENGINE_TRACE(engine, "\n");
a99b32a6 1083
baba6e57 1084 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
a99b32a6
CW
1085}
1086
0e704476
CW
1087const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
1088{
1089 switch (type) {
1090 case I915_CACHE_NONE: return " uncached";
1091 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
1092 case I915_CACHE_L3_LLC: return " L3+LLC";
1093 case I915_CACHE_WT: return " WT";
1094 default: return "";
1095 }
1096}
1097
f398bbde 1098static u32
742379c0
CW
1099read_subslice_reg(const struct intel_engine_cs *engine,
1100 int slice, int subslice, i915_reg_t reg)
0e704476 1101{
f398bbde
TU
1102 struct drm_i915_private *i915 = engine->i915;
1103 struct intel_uncore *uncore = engine->uncore;
7405cb77 1104 u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
0e704476
CW
1105 enum forcewake_domains fw_domains;
1106
f398bbde 1107 if (INTEL_GEN(i915) >= 11) {
7405cb77
TU
1108 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1109 mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
d3d57927 1110 } else {
7405cb77
TU
1111 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1112 mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
d3d57927
KG
1113 }
1114
4319382e 1115 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
0e704476 1116 FW_REG_READ);
4319382e 1117 fw_domains |= intel_uncore_forcewake_for_reg(uncore,
0e704476
CW
1118 GEN8_MCR_SELECTOR,
1119 FW_REG_READ | FW_REG_WRITE);
1120
4319382e
DCS
1121 spin_lock_irq(&uncore->lock);
1122 intel_uncore_forcewake_get__locked(uncore, fw_domains);
0e704476 1123
7405cb77 1124 old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
1e40d4ae 1125
7405cb77
TU
1126 mcr &= ~mcr_mask;
1127 mcr |= mcr_ss;
4319382e 1128 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
0e704476 1129
7405cb77 1130 val = intel_uncore_read_fw(uncore, reg);
0e704476 1131
7405cb77
TU
1132 mcr &= ~mcr_mask;
1133 mcr |= old_mcr & mcr_mask;
1e40d4ae 1134
4319382e 1135 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
0e704476 1136
4319382e
DCS
1137 intel_uncore_forcewake_put__locked(uncore, fw_domains);
1138 spin_unlock_irq(&uncore->lock);
0e704476 1139
7405cb77 1140 return val;
0e704476
CW
1141}
1142
1143/* NB: please notice the memset */
742379c0 1144void intel_engine_get_instdone(const struct intel_engine_cs *engine,
0e704476
CW
1145 struct intel_instdone *instdone)
1146{
f398bbde 1147 struct drm_i915_private *i915 = engine->i915;
0b6613c6 1148 const struct sseu_dev_info *sseu = &engine->gt->info.sseu;
baba6e57 1149 struct intel_uncore *uncore = engine->uncore;
0e704476
CW
1150 u32 mmio_base = engine->mmio_base;
1151 int slice;
1152 int subslice;
1153
1154 memset(instdone, 0, sizeof(*instdone));
1155
f398bbde 1156 switch (INTEL_GEN(i915)) {
0e704476 1157 default:
baba6e57
DCS
1158 instdone->instdone =
1159 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
0e704476 1160
8a68d464 1161 if (engine->id != RCS0)
0e704476
CW
1162 break;
1163
baba6e57
DCS
1164 instdone->slice_common =
1165 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
f7043102
LL
1166 if (INTEL_GEN(i915) >= 12) {
1167 instdone->slice_common_extra[0] =
1168 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
1169 instdone->slice_common_extra[1] =
1170 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
1171 }
eaef5b3c 1172 for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
0e704476 1173 instdone->sampler[slice][subslice] =
f398bbde 1174 read_subslice_reg(engine, slice, subslice,
0e704476
CW
1175 GEN7_SAMPLER_INSTDONE);
1176 instdone->row[slice][subslice] =
f398bbde 1177 read_subslice_reg(engine, slice, subslice,
0e704476
CW
1178 GEN7_ROW_INSTDONE);
1179 }
1180 break;
1181 case 7:
baba6e57
DCS
1182 instdone->instdone =
1183 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
0e704476 1184
8a68d464 1185 if (engine->id != RCS0)
0e704476
CW
1186 break;
1187
baba6e57
DCS
1188 instdone->slice_common =
1189 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1190 instdone->sampler[0][0] =
1191 intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1192 instdone->row[0][0] =
1193 intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
0e704476
CW
1194
1195 break;
1196 case 6:
1197 case 5:
1198 case 4:
baba6e57
DCS
1199 instdone->instdone =
1200 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
8a68d464 1201 if (engine->id == RCS0)
0e704476 1202 /* HACK: Using the wrong struct member */
baba6e57
DCS
1203 instdone->slice_common =
1204 intel_uncore_read(uncore, GEN4_INSTDONE1);
0e704476
CW
1205 break;
1206 case 3:
1207 case 2:
baba6e57 1208 instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
0e704476
CW
1209 break;
1210 }
1211}
f97fbf96 1212
a091d4ee
CW
1213static bool ring_is_idle(struct intel_engine_cs *engine)
1214{
a091d4ee
CW
1215 bool idle = true;
1216
293f8c0f
CW
1217 if (I915_SELFTEST_ONLY(!engine->mmio_base))
1218 return true;
1219
4ecd20c9 1220 if (!intel_engine_pm_get_if_awake(engine))
74d00d28 1221 return true;
a091d4ee 1222
44f8b802 1223 /* First check that no commands are left in the ring */
baba6e57
DCS
1224 if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1225 (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
44f8b802 1226 idle = false;
aed2fc10 1227
44f8b802 1228 /* No bit for gen2, so assume the CS parser is idle */
4ecd20c9 1229 if (INTEL_GEN(engine->i915) > 2 &&
baba6e57 1230 !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
a091d4ee
CW
1231 idle = false;
1232
4ecd20c9 1233 intel_engine_pm_put(engine);
a091d4ee
CW
1234
1235 return idle;
1236}
1237
5ec17c76 1238void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
d99f7b07
CW
1239{
1240 struct tasklet_struct *t = &engine->execlists.tasklet;
1241
570af07d
CW
1242 if (!t->func)
1243 return;
1244
59489387
CW
1245 local_bh_disable();
1246 if (tasklet_trylock(t)) {
1247 /* Must wait for any GPU reset in progress. */
1248 if (__tasklet_is_enabled(t))
1249 t->func(t->data);
1250 tasklet_unlock(t);
d99f7b07 1251 }
59489387 1252 local_bh_enable();
5ec17c76
CW
1253
1254 /* Synchronise and wait for the tasklet on another CPU */
1255 if (sync)
1256 tasklet_unlock_wait(t);
d99f7b07
CW
1257}
1258
5400367a
CW
1259/**
1260 * intel_engine_is_idle() - Report if the engine has finished process all work
1261 * @engine: the intel_engine_cs
1262 *
1263 * Return true if there are no requests pending, nothing left to be submitted
1264 * to hardware, and that the engine is idle.
1265 */
1266bool intel_engine_is_idle(struct intel_engine_cs *engine)
1267{
a8e9a419 1268 /* More white lies, if wedged, hw state is inconsistent */
cb823ed9 1269 if (intel_gt_is_wedged(engine->gt))
a8e9a419
CW
1270 return true;
1271
5f22e5b3 1272 if (!intel_engine_pm_is_awake(engine))
dc58958d
CW
1273 return true;
1274
4a118ecb 1275 /* Waiting to drain ELSP? */
22b7a426 1276 if (execlists_active(&engine->execlists)) {
315ca4c4 1277 synchronize_hardirq(engine->i915->drm.pdev->irq);
c34c5bca 1278
d99f7b07 1279 intel_engine_flush_submission(engine);
22495b68 1280
22b7a426 1281 if (execlists_active(&engine->execlists))
dd0cf235
CW
1282 return false;
1283 }
5400367a 1284
dd0cf235 1285 /* ELSP is empty, but there are ready requests? E.g. after reset */
655250a8 1286 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
d6edb6e3
CW
1287 return false;
1288
5400367a 1289 /* Ring stopped? */
293f8c0f 1290 return ring_is_idle(engine);
5400367a
CW
1291}
1292
cb823ed9 1293bool intel_engines_are_idle(struct intel_gt *gt)
05425249
CW
1294{
1295 struct intel_engine_cs *engine;
1296 enum intel_engine_id id;
1297
d7dc4131
CW
1298 /*
1299 * If the driver is wedged, HW state may be very inconsistent and
8490ae20
CW
1300 * report that it is still busy, even though we have stopped using it.
1301 */
cb823ed9 1302 if (intel_gt_is_wedged(gt))
8490ae20
CW
1303 return true;
1304
bd2be141 1305 /* Already parked (and passed an idleness test); must still be idle */
cb823ed9 1306 if (!READ_ONCE(gt->awake))
bd2be141
CW
1307 return true;
1308
5d904e3c 1309 for_each_engine(engine, gt, id) {
05425249
CW
1310 if (!intel_engine_is_idle(engine))
1311 return false;
1312 }
1313
1314 return true;
1315}
1316
cb823ed9 1317void intel_engines_reset_default_submission(struct intel_gt *gt)
ff44ad51
CW
1318{
1319 struct intel_engine_cs *engine;
1320 enum intel_engine_id id;
1321
b436a5f8
CW
1322 for_each_engine(engine, gt, id) {
1323 if (engine->sanitize)
1324 engine->sanitize(engine);
1325
ff44ad51 1326 engine->set_default_submission(engine);
b436a5f8 1327 }
ff44ad51
CW
1328}
1329
90cad095
CW
1330bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1331{
1332 switch (INTEL_GEN(engine->i915)) {
1333 case 2:
1334 return false; /* uses physical not virtual addresses */
1335 case 3:
1336 /* maybe only uses physical not virtual addresses */
1337 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
325b916a
CW
1338 case 4:
1339 return !IS_I965G(engine->i915); /* who knows! */
90cad095
CW
1340 case 6:
1341 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1342 default:
1343 return true;
1344 }
1345}
1346
60672784
CW
1347static struct intel_timeline *get_timeline(struct i915_request *rq)
1348{
1349 struct intel_timeline *tl;
1350
1351 /*
1352 * Even though we are holding the engine->active.lock here, there
1353 * is no control over the submission queue per-se and we are
1354 * inspecting the active state at a random point in time, with an
1355 * unknown queue. Play safe and make sure the timeline remains valid.
1356 * (Only being used for pretty printing, one extra kref shouldn't
1357 * cause a camel stampede!)
1358 */
1359 rcu_read_lock();
1360 tl = rcu_dereference(rq->timeline);
1361 if (!kref_get_unless_zero(&tl->kref))
1362 tl = NULL;
1363 rcu_read_unlock();
1364
1365 return tl;
1366}
1367
1368static int print_ring(char *buf, int sz, struct i915_request *rq)
1369{
1370 int len = 0;
1371
1372 if (!i915_request_signaled(rq)) {
1373 struct intel_timeline *tl = get_timeline(rq);
1374
1375 len = scnprintf(buf, sz,
1376 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
1377 i915_ggtt_offset(rq->ring->vma),
1378 tl ? tl->hwsp_offset : 0,
1379 hwsp_seqno(rq),
1380 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
1381 1000 * 1000));
1382
1383 if (tl)
1384 intel_timeline_put(tl);
1385 }
1386
1387 return len;
1388}
1389
c1bf2728
CW
1390static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1391{
1392 const size_t rowsize = 8 * sizeof(u32);
1393 const void *prev = NULL;
1394 bool skip = false;
1395 size_t pos;
1396
1397 for (pos = 0; pos < len; pos += rowsize) {
1398 char line[128];
1399
1400 if (prev && !memcmp(prev, buf + pos, rowsize)) {
1401 if (!skip) {
1402 drm_printf(m, "*\n");
1403 skip = true;
1404 }
1405 continue;
1406 }
1407
1408 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1409 rowsize, sizeof(u32),
1410 line, sizeof(line),
1411 false) >= sizeof(line));
286e6153 1412 drm_printf(m, "[%04zx] %s\n", pos, line);
c1bf2728
CW
1413
1414 prev = buf + pos;
1415 skip = false;
1416 }
1417}
1418
2229adc8
CW
1419static const char *repr_timer(const struct timer_list *t)
1420{
1421 if (!READ_ONCE(t->expires))
1422 return "inactive";
1423
1424 if (timer_pending(t))
1425 return "active";
1426
1427 return "expired";
1428}
1429
eca15360 1430static void intel_engine_print_registers(struct intel_engine_cs *engine,
3ceda3a4 1431 struct drm_printer *m)
f636edb2 1432{
f636edb2 1433 struct drm_i915_private *dev_priv = engine->i915;
c36eebd9 1434 struct intel_engine_execlists * const execlists = &engine->execlists;
f636edb2
CW
1435 u64 addr;
1436
b26496ae 1437 if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
baba6e57 1438 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
c4e8ba73
CW
1439 if (HAS_EXECLISTS(dev_priv)) {
1440 drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
1441 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
1442 drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
1443 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
1444 }
3ceda3a4 1445 drm_printf(m, "\tRING_START: 0x%08x\n",
baba6e57 1446 ENGINE_READ(engine, RING_START));
3ceda3a4 1447 drm_printf(m, "\tRING_HEAD: 0x%08x\n",
baba6e57 1448 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
3ceda3a4 1449 drm_printf(m, "\tRING_TAIL: 0x%08x\n",
baba6e57 1450 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
3c75de5b 1451 drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
baba6e57
DCS
1452 ENGINE_READ(engine, RING_CTL),
1453 ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
3c75de5b
CW
1454 if (INTEL_GEN(engine->i915) > 2) {
1455 drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
baba6e57
DCS
1456 ENGINE_READ(engine, RING_MI_MODE),
1457 ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
3c75de5b 1458 }
3ceda3a4
CW
1459
1460 if (INTEL_GEN(dev_priv) >= 6) {
70a76a9b 1461 drm_printf(m, "\tRING_IMR: 0x%08x\n",
baba6e57 1462 ENGINE_READ(engine, RING_IMR));
70a76a9b
CW
1463 drm_printf(m, "\tRING_ESR: 0x%08x\n",
1464 ENGINE_READ(engine, RING_ESR));
1465 drm_printf(m, "\tRING_EMR: 0x%08x\n",
1466 ENGINE_READ(engine, RING_EMR));
1467 drm_printf(m, "\tRING_EIR: 0x%08x\n",
1468 ENGINE_READ(engine, RING_EIR));
3ceda3a4
CW
1469 }
1470
f636edb2
CW
1471 addr = intel_engine_get_active_head(engine);
1472 drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
1473 upper_32_bits(addr), lower_32_bits(addr));
1474 addr = intel_engine_get_last_batch_head(engine);
1475 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1476 upper_32_bits(addr), lower_32_bits(addr));
a0cf5790 1477 if (INTEL_GEN(dev_priv) >= 8)
baba6e57 1478 addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
a0cf5790 1479 else if (INTEL_GEN(dev_priv) >= 4)
baba6e57 1480 addr = ENGINE_READ(engine, RING_DMA_FADD);
a0cf5790 1481 else
baba6e57 1482 addr = ENGINE_READ(engine, DMA_FADD_I8XX);
a0cf5790
CW
1483 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1484 upper_32_bits(addr), lower_32_bits(addr));
1485 if (INTEL_GEN(dev_priv) >= 4) {
1486 drm_printf(m, "\tIPEIR: 0x%08x\n",
baba6e57 1487 ENGINE_READ(engine, RING_IPEIR));
a0cf5790 1488 drm_printf(m, "\tIPEHR: 0x%08x\n",
baba6e57 1489 ENGINE_READ(engine, RING_IPEHR));
a0cf5790 1490 } else {
baba6e57
DCS
1491 drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1492 drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
a0cf5790 1493 }
f636edb2 1494
d0637f7a
DCS
1495 if (intel_engine_in_guc_submission_mode(engine)) {
1496 /* nothing to print yet */
1497 } else if (HAS_EXECLISTS(dev_priv)) {
22b7a426 1498 struct i915_request * const *port, *rq;
0ca88ba0
CW
1499 const u32 *hws =
1500 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
7d4c75d9 1501 const u8 num_entries = execlists->csb_size;
f636edb2 1502 unsigned int idx;
df4f94e8 1503 u8 read, write;
f636edb2 1504
3a7a92ab 1505 drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
2229adc8
CW
1506 yesno(test_bit(TASKLET_STATE_SCHED,
1507 &engine->execlists.tasklet.state)),
1508 enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
3a7a92ab 1509 repr_timer(&engine->execlists.preempt),
2229adc8 1510 repr_timer(&engine->execlists.timer));
f636edb2 1511
df4f94e8
CW
1512 read = execlists->csb_head;
1513 write = READ_ONCE(*execlists->csb_write);
1514
2229adc8
CW
1515 drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
1516 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1517 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1518 read, write, num_entries);
1519
7d4c75d9 1520 if (read >= num_entries)
f636edb2 1521 read = 0;
7d4c75d9 1522 if (write >= num_entries)
f636edb2
CW
1523 write = 0;
1524 if (read > write)
7d4c75d9 1525 write += num_entries;
f636edb2 1526 while (read < write) {
7d4c75d9
MK
1527 idx = ++read % num_entries;
1528 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1529 idx, hws[idx * 2], hws[idx * 2 + 1]);
f636edb2
CW
1530 }
1531
c36eebd9 1532 execlists_active_lock_bh(execlists);
fecffa46 1533 rcu_read_lock();
22b7a426 1534 for (port = execlists->active; (rq = *port); port++) {
489645d5 1535 char hdr[160];
22b7a426
CW
1536 int len;
1537
61f874d6 1538 len = scnprintf(hdr, sizeof(hdr),
3e48e836 1539 "\t\tActive[%d]: ccid:%08x%s%s, ",
60672784 1540 (int)(port - execlists->active),
3e48e836
CW
1541 rq->context->lrc.ccid,
1542 intel_context_is_closed(rq->context) ? "!" : "",
1543 intel_context_is_banned(rq->context) ? "*" : "");
60672784 1544 len += print_ring(hdr + len, sizeof(hdr) - len, rq);
61f874d6 1545 scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
562675d0 1546 i915_request_show(m, rq, hdr, 0);
22b7a426
CW
1547 }
1548 for (port = execlists->pending; (rq = *port); port++) {
60672784
CW
1549 char hdr[160];
1550 int len;
d19d71fc 1551
60672784 1552 len = scnprintf(hdr, sizeof(hdr),
3e48e836 1553 "\t\tPending[%d]: ccid:%08x%s%s, ",
60672784 1554 (int)(port - execlists->pending),
3e48e836
CW
1555 rq->context->lrc.ccid,
1556 intel_context_is_closed(rq->context) ? "!" : "",
1557 intel_context_is_banned(rq->context) ? "*" : "");
60672784
CW
1558 len += print_ring(hdr + len, sizeof(hdr) - len, rq);
1559 scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
562675d0 1560 i915_request_show(m, rq, hdr, 0);
f636edb2 1561 }
fecffa46 1562 rcu_read_unlock();
c36eebd9 1563 execlists_active_unlock_bh(execlists);
f636edb2
CW
1564 } else if (INTEL_GEN(dev_priv) > 6) {
1565 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
baba6e57 1566 ENGINE_READ(engine, RING_PP_DIR_BASE));
f636edb2 1567 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
baba6e57 1568 ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
f636edb2 1569 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
baba6e57 1570 ENGINE_READ(engine, RING_PP_DIR_DCLV));
f636edb2 1571 }
3ceda3a4
CW
1572}
1573
83c31783
CW
1574static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1575{
1576 void *ring;
1577 int size;
1578
1579 drm_printf(m,
1580 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1581 rq->head, rq->postfix, rq->tail,
1582 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1583 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1584
1585 size = rq->tail - rq->head;
1586 if (rq->tail < rq->head)
1587 size += rq->ring->size;
1588
1589 ring = kmalloc(size, GFP_ATOMIC);
1590 if (ring) {
1591 const void *vaddr = rq->ring->vaddr;
1592 unsigned int head = rq->head;
1593 unsigned int len = 0;
1594
1595 if (rq->tail < head) {
1596 len = rq->ring->size - head;
1597 memcpy(ring, vaddr + head, len);
1598 head = 0;
1599 }
1600 memcpy(ring + len, vaddr + head, size - len);
1601
1602 hexdump(m, ring, size);
1603 kfree(ring);
1604 }
1605}
1606
32ff621f
CW
1607static unsigned long list_count(struct list_head *list)
1608{
1609 struct list_head *pos;
1610 unsigned long count = 0;
1611
1612 list_for_each(pos, list)
1613 count++;
1614
1615 return count;
1616}
1617
0bda4b80
CW
1618static unsigned long read_ul(void *p, size_t x)
1619{
1620 return *(unsigned long *)(p + x);
1621}
1622
1623static void print_properties(struct intel_engine_cs *engine,
1624 struct drm_printer *m)
1625{
1626 static const struct pmap {
1627 size_t offset;
1628 const char *name;
1629 } props[] = {
1630#define P(x) { \
1631 .offset = offsetof(typeof(engine->props), x), \
1632 .name = #x \
1633}
1634 P(heartbeat_interval_ms),
1635 P(max_busywait_duration_ns),
1636 P(preempt_timeout_ms),
1637 P(stop_timeout_ms),
1638 P(timeslice_duration_ms),
1639
1640 {},
1641#undef P
1642 };
1643 const struct pmap *p;
1644
1645 drm_printf(m, "\tProperties:\n");
1646 for (p = props; p->name; p++)
1647 drm_printf(m, "\t\t%s: %lu [default %lu]\n",
1648 p->name,
1649 read_ul(&engine->props, p->offset),
1650 read_ul(&engine->defaults, p->offset));
1651}
1652
3ceda3a4
CW
1653void intel_engine_dump(struct intel_engine_cs *engine,
1654 struct drm_printer *m,
1655 const char *header, ...)
1656{
3ceda3a4 1657 struct i915_gpu_error * const error = &engine->i915->gpu_error;
0212bdef 1658 struct i915_request *rq;
538ef96b 1659 intel_wakeref_t wakeref;
cfe7288c 1660 unsigned long flags;
4fb33953 1661 ktime_t dummy;
3ceda3a4
CW
1662
1663 if (header) {
1664 va_list ap;
1665
1666 va_start(ap, header);
1667 drm_vprintf(m, header, &ap);
1668 va_end(ap);
1669 }
1670
cb823ed9 1671 if (intel_gt_is_wedged(engine->gt))
3ceda3a4
CW
1672 drm_printf(m, "*** WEDGED ***\n");
1673
79ffac85 1674 drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
7983990c
CW
1675 drm_printf(m, "\tBarriers?: %s\n",
1676 yesno(!llist_empty(&engine->barrier_tasks)));
b81e4d9b
CW
1677 drm_printf(m, "\tLatency: %luus\n",
1678 ewma__engine_latency_read(&engine->latency));
4fb33953
CW
1679 if (intel_engine_supports_stats(engine))
1680 drm_printf(m, "\tRuntime: %llums\n",
1681 ktime_to_ms(intel_engine_get_busy_time(engine,
1682 &dummy)));
ac4fc5b3 1683 drm_printf(m, "\tForcewake: %x domains, %d active\n",
2c421896 1684 engine->fw_domain, READ_ONCE(engine->fw_active));
058179e7
CW
1685
1686 rcu_read_lock();
1687 rq = READ_ONCE(engine->heartbeat.systole);
1688 if (rq)
1689 drm_printf(m, "\tHeartbeat: %d ms ago\n",
1690 jiffies_to_msecs(jiffies - rq->emitted_jiffies));
1691 rcu_read_unlock();
3ceda3a4
CW
1692 drm_printf(m, "\tReset count: %d (global %d)\n",
1693 i915_reset_engine_count(error, engine),
1694 i915_reset_count(error));
0bda4b80 1695 print_properties(engine, m);
3ceda3a4 1696
3ceda3a4
CW
1697 drm_printf(m, "\tRequests:\n");
1698
cfe7288c 1699 spin_lock_irqsave(&engine->active.lock, flags);
cf4331dd 1700 rq = intel_engine_find_active_request(engine);
3ceda3a4 1701 if (rq) {
d19d71fc
CW
1702 struct intel_timeline *tl = get_timeline(rq);
1703
562675d0 1704 i915_request_show(m, rq, "\t\tactive ", 0);
83c31783 1705
ef5032a0 1706 drm_printf(m, "\t\tring->start: 0x%08x\n",
3ceda3a4 1707 i915_ggtt_offset(rq->ring->vma));
ef5032a0 1708 drm_printf(m, "\t\tring->head: 0x%08x\n",
3ceda3a4 1709 rq->ring->head);
ef5032a0 1710 drm_printf(m, "\t\tring->tail: 0x%08x\n",
3ceda3a4 1711 rq->ring->tail);
ef5032a0
CW
1712 drm_printf(m, "\t\tring->emit: 0x%08x\n",
1713 rq->ring->emit);
1714 drm_printf(m, "\t\tring->space: 0x%08x\n",
1715 rq->ring->space);
d19d71fc
CW
1716
1717 if (tl) {
1718 drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
1719 tl->hwsp_offset);
1720 intel_timeline_put(tl);
1721 }
83c31783
CW
1722
1723 print_request_ring(m, rq);
bb120e11 1724
9f3ccd40 1725 if (rq->context->lrc_reg_state) {
bb120e11 1726 drm_printf(m, "Logical Ring Context:\n");
9f3ccd40 1727 hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
bb120e11 1728 }
3ceda3a4 1729 }
32ff621f 1730 drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
cfe7288c 1731 spin_unlock_irqrestore(&engine->active.lock, flags);
3ceda3a4 1732
a4eb99a1 1733 drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
cd6a8513 1734 wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
538ef96b 1735 if (wakeref) {
3ceda3a4 1736 intel_engine_print_registers(engine, m);
cd6a8513 1737 intel_runtime_pm_put(engine->uncore->rpm, wakeref);
3ceda3a4
CW
1738 } else {
1739 drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1740 }
f636edb2 1741
1f0e785a 1742 intel_execlists_show_requests(engine, m, i915_request_show, 8);
a27d5a44 1743
c1bf2728 1744 drm_printf(m, "HWSP:\n");
0ca88ba0 1745 hexdump(m, engine->status_page.addr, PAGE_SIZE);
c1bf2728 1746
c400cc2a 1747 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
52c0fdb2
CW
1748
1749 intel_engine_print_breadcrumbs(engine, m);
f636edb2
CW
1750}
1751
810b7ee3
CW
1752static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
1753 ktime_t *now)
30e17b78
TU
1754{
1755 ktime_t total = engine->stats.total;
1756
1757 /*
1758 * If the engine is executing something at the moment
1759 * add it to the total.
1760 */
810b7ee3 1761 *now = ktime_get();
f530a41d 1762 if (READ_ONCE(engine->stats.active))
810b7ee3 1763 total = ktime_add(total, ktime_sub(*now, engine->stats.start));
30e17b78
TU
1764
1765 return total;
1766}
1767
1768/**
1769 * intel_engine_get_busy_time() - Return current accumulated engine busyness
1770 * @engine: engine to report on
810b7ee3 1771 * @now: monotonic timestamp of sampling
30e17b78
TU
1772 *
1773 * Returns accumulated time @engine was busy since engine stats were enabled.
1774 */
810b7ee3 1775ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
30e17b78 1776{
741258cd 1777 unsigned int seq;
30e17b78 1778 ktime_t total;
30e17b78 1779
741258cd 1780 do {
f530a41d 1781 seq = read_seqcount_begin(&engine->stats.lock);
810b7ee3 1782 total = __intel_engine_get_busy_time(engine, now);
f530a41d 1783 } while (read_seqcount_retry(&engine->stats.lock, seq));
30e17b78
TU
1784
1785 return total;
1786}
1787
cf4331dd
CW
1788static bool match_ring(struct i915_request *rq)
1789{
baba6e57 1790 u32 ring = ENGINE_READ(rq->engine, RING_START);
cf4331dd
CW
1791
1792 return ring == i915_ggtt_offset(rq->ring->vma);
1793}
1794
1795struct i915_request *
1796intel_engine_find_active_request(struct intel_engine_cs *engine)
1797{
1798 struct i915_request *request, *active = NULL;
cf4331dd
CW
1799
1800 /*
1801 * We are called by the error capture, reset and to dump engine
1802 * state at random points in time. In particular, note that neither is
1803 * crucially ordered with an interrupt. After a hang, the GPU is dead
1804 * and we assume that no more writes can happen (we waited long enough
1805 * for all writes that were in transaction to be flushed) - adding an
1806 * extra delay for a recent interrupt is pointless. Hence, we do
1807 * not need an engine->irq_seqno_barrier() before the seqno reads.
1808 * At all other times, we must assume the GPU is still running, but
1809 * we only care about the snapshot of this moment.
1810 */
cfe7288c 1811 lockdep_assert_held(&engine->active.lock);
94523024
CW
1812
1813 rcu_read_lock();
1814 request = execlists_active(&engine->execlists);
1815 if (request) {
1816 struct intel_timeline *tl = request->context->timeline;
1817
1818 list_for_each_entry_from_reverse(request, &tl->requests, link) {
163433e5 1819 if (__i915_request_is_complete(request))
94523024
CW
1820 break;
1821
1822 active = request;
1823 }
1824 }
1825 rcu_read_unlock();
1826 if (active)
1827 return active;
1828
422d7df4 1829 list_for_each_entry(request, &engine->active.requests, sched.link) {
163433e5 1830 if (__i915_request_is_complete(request))
cf4331dd
CW
1831 continue;
1832
163433e5 1833 if (!__i915_request_has_started(request))
422d7df4 1834 continue;
cf4331dd
CW
1835
1836 /* More than one preemptible request may match! */
1837 if (!match_ring(request))
422d7df4 1838 continue;
cf4331dd
CW
1839
1840 active = request;
1841 break;
1842 }
cf4331dd
CW
1843
1844 return active;
1845}
1846
f97fbf96 1847#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
38775829 1848#include "mock_engine.c"
c7302f20 1849#include "selftest_engine.c"
112ed2d3 1850#include "selftest_engine_cs.c"
f97fbf96 1851#endif