drm/xe: Assume MTL's forcewake register continues to future platforms
[linux-block.git] / drivers / gpu / drm / xe / xe_guc_submit.c
CommitLineData
dd08ebf6
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2022 Intel Corporation
4 */
5
6#include <linux/bitfield.h>
7#include <linux/bitmap.h>
8#include <linux/circ_buf.h>
9#include <linux/delay.h>
10#include <linux/dma-fence-array.h>
11
12#include <drm/drm_managed.h>
13
14#include "xe_device.h"
15#include "xe_engine.h"
16#include "xe_guc.h"
17#include "xe_guc_ct.h"
18#include "xe_guc_engine_types.h"
19#include "xe_guc_submit.h"
20#include "xe_gt.h"
21#include "xe_force_wake.h"
22#include "xe_gpu_scheduler.h"
23#include "xe_hw_engine.h"
24#include "xe_hw_fence.h"
25#include "xe_lrc.h"
26#include "xe_macros.h"
27#include "xe_map.h"
28#include "xe_mocs.h"
29#include "xe_ring_ops_types.h"
30#include "xe_sched_job.h"
31#include "xe_trace.h"
32#include "xe_vm.h"
33
34#include "gt/intel_lrc_reg.h"
35
36static struct xe_gt *
37guc_to_gt(struct xe_guc *guc)
38{
39 return container_of(guc, struct xe_gt, uc.guc);
40}
41
42static struct xe_device *
43guc_to_xe(struct xe_guc *guc)
44{
45 return gt_to_xe(guc_to_gt(guc));
46}
47
48static struct xe_guc *
49engine_to_guc(struct xe_engine *e)
50{
51 return &e->gt->uc.guc;
52}
53
54/*
55 * Helpers for engine state, using an atomic as some of the bits can transition
56 * as the same time (e.g. a suspend can be happning at the same time as schedule
57 * engine done being processed).
58 */
59#define ENGINE_STATE_REGISTERED (1 << 0)
60#define ENGINE_STATE_ENABLED (1 << 1)
61#define ENGINE_STATE_PENDING_ENABLE (1 << 2)
62#define ENGINE_STATE_PENDING_DISABLE (1 << 3)
63#define ENGINE_STATE_DESTROYED (1 << 4)
64#define ENGINE_STATE_SUSPENDED (1 << 5)
65#define ENGINE_STATE_RESET (1 << 6)
66#define ENGINE_STATE_KILLED (1 << 7)
67
68static bool engine_registered(struct xe_engine *e)
69{
70 return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
71}
72
73static void set_engine_registered(struct xe_engine *e)
74{
75 atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
76}
77
78static void clear_engine_registered(struct xe_engine *e)
79{
80 atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
81}
82
83static bool engine_enabled(struct xe_engine *e)
84{
85 return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
86}
87
88static void set_engine_enabled(struct xe_engine *e)
89{
90 atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
91}
92
93static void clear_engine_enabled(struct xe_engine *e)
94{
95 atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
96}
97
98static bool engine_pending_enable(struct xe_engine *e)
99{
100 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
101}
102
103static void set_engine_pending_enable(struct xe_engine *e)
104{
105 atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
106}
107
108static void clear_engine_pending_enable(struct xe_engine *e)
109{
110 atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
111}
112
113static bool engine_pending_disable(struct xe_engine *e)
114{
115 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
116}
117
118static void set_engine_pending_disable(struct xe_engine *e)
119{
120 atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
121}
122
123static void clear_engine_pending_disable(struct xe_engine *e)
124{
125 atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
126}
127
128static bool engine_destroyed(struct xe_engine *e)
129{
130 return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
131}
132
133static void set_engine_destroyed(struct xe_engine *e)
134{
135 atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
136}
137
138static bool engine_banned(struct xe_engine *e)
139{
140 return (e->flags & ENGINE_FLAG_BANNED);
141}
142
143static void set_engine_banned(struct xe_engine *e)
144{
145 e->flags |= ENGINE_FLAG_BANNED;
146}
147
148static bool engine_suspended(struct xe_engine *e)
149{
150 return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
151}
152
153static void set_engine_suspended(struct xe_engine *e)
154{
155 atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
156}
157
158static void clear_engine_suspended(struct xe_engine *e)
159{
160 atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
161}
162
163static bool engine_reset(struct xe_engine *e)
164{
165 return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
166}
167
168static void set_engine_reset(struct xe_engine *e)
169{
170 atomic_or(ENGINE_STATE_RESET, &e->guc->state);
171}
172
173static bool engine_killed(struct xe_engine *e)
174{
175 return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
176}
177
178static void set_engine_killed(struct xe_engine *e)
179{
180 atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
181}
182
183static bool engine_killed_or_banned(struct xe_engine *e)
184{
185 return engine_killed(e) || engine_banned(e);
186}
187
188static void guc_submit_fini(struct drm_device *drm, void *arg)
189{
190 struct xe_guc *guc = arg;
191
192 xa_destroy(&guc->submission_state.engine_lookup);
193 ida_destroy(&guc->submission_state.guc_ids);
194 bitmap_free(guc->submission_state.guc_ids_bitmap);
195}
196
197#define GUC_ID_MAX 65535
198#define GUC_ID_NUMBER_MLRC 4096
199#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
200#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC
201
202static const struct xe_engine_ops guc_engine_ops;
203
204static void primelockdep(struct xe_guc *guc)
205{
206 if (!IS_ENABLED(CONFIG_LOCKDEP))
207 return;
208
209 fs_reclaim_acquire(GFP_KERNEL);
210
211 mutex_lock(&guc->submission_state.lock);
212 might_lock(&guc->submission_state.suspend.lock);
213 mutex_unlock(&guc->submission_state.lock);
214
215 fs_reclaim_release(GFP_KERNEL);
216}
217
218int xe_guc_submit_init(struct xe_guc *guc)
219{
220 struct xe_device *xe = guc_to_xe(guc);
221 struct xe_gt *gt = guc_to_gt(guc);
222 int err;
223
224 guc->submission_state.guc_ids_bitmap =
225 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
226 if (!guc->submission_state.guc_ids_bitmap)
227 return -ENOMEM;
228
229 gt->engine_ops = &guc_engine_ops;
230
231 mutex_init(&guc->submission_state.lock);
232 xa_init(&guc->submission_state.engine_lookup);
233 ida_init(&guc->submission_state.guc_ids);
234
235 spin_lock_init(&guc->submission_state.suspend.lock);
236 guc->submission_state.suspend.context = dma_fence_context_alloc(1);
237
238 primelockdep(guc);
239
240 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
241 if (err)
242 return err;
243
244 return 0;
245}
246
247static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
248{
249 int ret;
250 void *ptr;
251
252 /*
253 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
254 * worse case user gets -ENOMEM on engine create and has to try again.
255 *
256 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
257 * failure.
258 */
259 lockdep_assert_held(&guc->submission_state.lock);
260
261 if (xe_engine_is_parallel(e)) {
262 void *bitmap = guc->submission_state.guc_ids_bitmap;
263
264 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
265 order_base_2(e->width));
266 } else {
267 ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
268 GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
269 }
270 if (ret < 0)
271 return ret;
272
273 e->guc->id = ret;
274 if (xe_engine_is_parallel(e))
275 e->guc->id += GUC_ID_START_MLRC;
276
277 ptr = xa_store(&guc->submission_state.engine_lookup,
278 e->guc->id, e, GFP_NOWAIT);
279 if (IS_ERR(ptr)) {
280 ret = PTR_ERR(ptr);
281 goto err_release;
282 }
283
284 return 0;
285
286err_release:
287 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
288 return ret;
289}
290
291static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
292{
293 mutex_lock(&guc->submission_state.lock);
294 xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
295 if (xe_engine_is_parallel(e))
296 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
297 e->guc->id - GUC_ID_START_MLRC,
298 order_base_2(e->width));
299 else
300 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
301 mutex_unlock(&guc->submission_state.lock);
302}
303
304struct engine_policy {
305 u32 count;
306 struct guc_update_engine_policy h2g;
307};
308
309static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
310{
311 size_t bytes = sizeof(policy->h2g.header) +
312 (sizeof(policy->h2g.klv[0]) * policy->count);
313
314 return bytes / sizeof(u32);
315}
316
317static void __guc_engine_policy_start_klv(struct engine_policy *policy,
318 u16 guc_id)
319{
320 policy->h2g.header.action =
321 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
322 policy->h2g.header.guc_id = guc_id;
323 policy->count = 0;
324}
325
326#define MAKE_ENGINE_POLICY_ADD(func, id) \
327static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
328 u32 data) \
329{ \
330 XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
331 \
332 policy->h2g.klv[policy->count].kl = \
333 FIELD_PREP(GUC_KLV_0_KEY, \
334 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
335 FIELD_PREP(GUC_KLV_0_LEN, 1); \
336 policy->h2g.klv[policy->count].value = data; \
337 policy->count++; \
338}
339
340MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
341MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
342MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
343#undef MAKE_ENGINE_POLICY_ADD
344
345static const int xe_engine_prio_to_guc[] = {
346 [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
347 [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
348 [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
349 [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
350};
351
352static void init_policies(struct xe_guc *guc, struct xe_engine *e)
353{
354 struct engine_policy policy;
355 enum xe_engine_priority prio = e->priority;
356 u32 timeslice_us = e->sched_props.timeslice_us;
357 u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
358
359 XE_BUG_ON(!engine_registered(e));
360
361 __guc_engine_policy_start_klv(&policy, e->guc->id);
362 __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
363 __guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
364 __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
365
366 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
367 __guc_engine_policy_action_size(&policy), 0, 0);
368}
369
370static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
371{
372 struct engine_policy policy;
373
374 __guc_engine_policy_start_klv(&policy, e->guc->id);
375 __guc_engine_policy_add_preemption_timeout(&policy, 1);
376
377 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
378 __guc_engine_policy_action_size(&policy), 0, 0);
379}
380
381#define PARALLEL_SCRATCH_SIZE 2048
382#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2)
383#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE)
384#define CACHELINE_BYTES 64
385
386struct sync_semaphore {
387 u32 semaphore;
388 u8 unused[CACHELINE_BYTES - sizeof(u32)];
389};
390
391struct parallel_scratch {
392 struct guc_sched_wq_desc wq_desc;
393
394 struct sync_semaphore go;
395 struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
396
397 u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
398 sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
399
400 u32 wq[WQ_SIZE / sizeof(u32)];
401};
402
403#define parallel_read(xe_, map_, field_) \
404 xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
405#define parallel_write(xe_, map_, field_, val_) \
406 xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
407
408static void __register_mlrc_engine(struct xe_guc *guc,
409 struct xe_engine *e,
410 struct guc_ctxt_registration_info *info)
411{
412#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
413 u32 action[MAX_MLRC_REG_SIZE];
414 int len = 0;
415 int i;
416
417 XE_BUG_ON(!xe_engine_is_parallel(e));
418
419 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
420 action[len++] = info->flags;
421 action[len++] = info->context_idx;
422 action[len++] = info->engine_class;
423 action[len++] = info->engine_submit_mask;
424 action[len++] = info->wq_desc_lo;
425 action[len++] = info->wq_desc_hi;
426 action[len++] = info->wq_base_lo;
427 action[len++] = info->wq_base_hi;
428 action[len++] = info->wq_size;
429 action[len++] = e->width;
430 action[len++] = info->hwlrca_lo;
431 action[len++] = info->hwlrca_hi;
432
433 for (i = 1; i < e->width; ++i) {
434 struct xe_lrc *lrc = e->lrc + i;
435
436 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
437 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
438 }
439
440 XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
441#undef MAX_MLRC_REG_SIZE
442
443 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
444}
445
446static void __register_engine(struct xe_guc *guc,
447 struct guc_ctxt_registration_info *info)
448{
449 u32 action[] = {
450 XE_GUC_ACTION_REGISTER_CONTEXT,
451 info->flags,
452 info->context_idx,
453 info->engine_class,
454 info->engine_submit_mask,
455 info->wq_desc_lo,
456 info->wq_desc_hi,
457 info->wq_base_lo,
458 info->wq_base_hi,
459 info->wq_size,
460 info->hwlrca_lo,
461 info->hwlrca_hi,
462 };
463
464 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
465}
466
467static void register_engine(struct xe_engine *e)
468{
469 struct xe_guc *guc = engine_to_guc(e);
470 struct xe_device *xe = guc_to_xe(guc);
471 struct xe_lrc *lrc = e->lrc;
472 struct guc_ctxt_registration_info info;
473
474 XE_BUG_ON(engine_registered(e));
475
476 memset(&info, 0, sizeof(info));
477 info.context_idx = e->guc->id;
478 info.engine_class = xe_engine_class_to_guc_class(e->class);
479 info.engine_submit_mask = e->logical_mask;
480 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
481 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
482 info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
483
484 if (xe_engine_is_parallel(e)) {
485 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
486 struct iosys_map map = xe_lrc_parallel_map(lrc);
487
488 info.wq_desc_lo = lower_32_bits(ggtt_addr +
489 offsetof(struct parallel_scratch, wq_desc));
490 info.wq_desc_hi = upper_32_bits(ggtt_addr +
491 offsetof(struct parallel_scratch, wq_desc));
492 info.wq_base_lo = lower_32_bits(ggtt_addr +
493 offsetof(struct parallel_scratch, wq[0]));
494 info.wq_base_hi = upper_32_bits(ggtt_addr +
495 offsetof(struct parallel_scratch, wq[0]));
496 info.wq_size = WQ_SIZE;
497
498 e->guc->wqi_head = 0;
499 e->guc->wqi_tail = 0;
500 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
501 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
502 }
503
504 set_engine_registered(e);
505 trace_xe_engine_register(e);
506 if (xe_engine_is_parallel(e))
507 __register_mlrc_engine(guc, e, &info);
508 else
509 __register_engine(guc, &info);
510 init_policies(guc, e);
511}
512
513static u32 wq_space_until_wrap(struct xe_engine *e)
514{
515 return (WQ_SIZE - e->guc->wqi_tail);
516}
517
518static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
519{
520 struct xe_guc *guc = engine_to_guc(e);
521 struct xe_device *xe = guc_to_xe(guc);
522 struct iosys_map map = xe_lrc_parallel_map(e->lrc);
523 unsigned int sleep_period_ms = 1;
524
525#define AVAILABLE_SPACE \
526 CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
527 if (wqi_size > AVAILABLE_SPACE) {
528try_again:
529 e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
530 if (wqi_size > AVAILABLE_SPACE) {
531 if (sleep_period_ms == 1024) {
532 xe_gt_reset_async(e->gt);
533 return -ENODEV;
534 }
535
536 msleep(sleep_period_ms);
537 sleep_period_ms <<= 1;
538 goto try_again;
539 }
540 }
541#undef AVAILABLE_SPACE
542
543 return 0;
544}
545
546static int wq_noop_append(struct xe_engine *e)
547{
548 struct xe_guc *guc = engine_to_guc(e);
549 struct xe_device *xe = guc_to_xe(guc);
550 struct iosys_map map = xe_lrc_parallel_map(e->lrc);
551 u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
552
553 if (wq_wait_for_space(e, wq_space_until_wrap(e)))
554 return -ENODEV;
555
556 XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
557
558 parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
559 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
560 FIELD_PREP(WQ_LEN_MASK, len_dw));
561 e->guc->wqi_tail = 0;
562
563 return 0;
564}
565
566static void wq_item_append(struct xe_engine *e)
567{
568 struct xe_guc *guc = engine_to_guc(e);
569 struct xe_device *xe = guc_to_xe(guc);
570 struct iosys_map map = xe_lrc_parallel_map(e->lrc);
571 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
572 u32 wqi_size = (e->width + 3) * sizeof(u32);
573 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
574 int i = 0, j;
575
576 if (wqi_size > wq_space_until_wrap(e)) {
577 if (wq_noop_append(e))
578 return;
579 }
580 if (wq_wait_for_space(e, wqi_size))
581 return;
582
583 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
584 FIELD_PREP(WQ_LEN_MASK, len_dw);
585 wqi[i++] = xe_lrc_descriptor(e->lrc);
586 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
587 FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
588 wqi[i++] = 0;
589 for (j = 1; j < e->width; ++j) {
590 struct xe_lrc *lrc = e->lrc + j;
591
592 wqi[i++] = lrc->ring.tail / sizeof(u64);
593 }
594
595 XE_BUG_ON(i != wqi_size / sizeof(u32));
596
597 iosys_map_incr(&map, offsetof(struct parallel_scratch,
598 wq[e->guc->wqi_tail / sizeof(u32)]));
599 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
600 e->guc->wqi_tail += wqi_size;
601 XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
602
603 xe_device_wmb(xe);
604
605 map = xe_lrc_parallel_map(e->lrc);
606 parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
607}
608
609#define RESUME_PENDING ~0x0ull
610static void submit_engine(struct xe_engine *e)
611{
612 struct xe_guc *guc = engine_to_guc(e);
613 struct xe_lrc *lrc = e->lrc;
614 u32 action[3];
615 u32 g2h_len = 0;
616 u32 num_g2h = 0;
617 int len = 0;
618 bool extra_submit = false;
619
620 XE_BUG_ON(!engine_registered(e));
621
622 if (xe_engine_is_parallel(e))
623 wq_item_append(e);
624 else
625 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
626
627 if (engine_suspended(e) && !xe_engine_is_parallel(e))
628 return;
629
630 if (!engine_enabled(e) && !engine_suspended(e)) {
631 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
632 action[len++] = e->guc->id;
633 action[len++] = GUC_CONTEXT_ENABLE;
634 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
635 num_g2h = 1;
636 if (xe_engine_is_parallel(e))
637 extra_submit = true;
638
639 e->guc->resume_time = RESUME_PENDING;
640 set_engine_pending_enable(e);
641 set_engine_enabled(e);
642 trace_xe_engine_scheduling_enable(e);
643 } else {
644 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
645 action[len++] = e->guc->id;
646 trace_xe_engine_submit(e);
647 }
648
649 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
650
651 if (extra_submit) {
652 len = 0;
653 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
654 action[len++] = e->guc->id;
655 trace_xe_engine_submit(e);
656
657 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
658 }
659}
660
661static struct dma_fence *
662guc_engine_run_job(struct drm_sched_job *drm_job)
663{
664 struct xe_sched_job *job = to_xe_sched_job(drm_job);
665 struct xe_engine *e = job->engine;
666
667 XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
668 !engine_banned(e) && !engine_suspended(e));
669
670 trace_xe_sched_job_run(job);
671
672 if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
673 if (!engine_registered(e))
674 register_engine(e);
675 e->ring_ops->emit_job(job);
676 submit_engine(e);
677 }
678
679 if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
680 return job->fence;
681 else
682 return dma_fence_get(job->fence);
683}
684
685static void guc_engine_free_job(struct drm_sched_job *drm_job)
686{
687 struct xe_sched_job *job = to_xe_sched_job(drm_job);
688
689 trace_xe_sched_job_free(job);
690 xe_sched_job_put(job);
691}
692
693static int guc_read_stopped(struct xe_guc *guc)
694{
695 return atomic_read(&guc->submission_state.stopped);
696}
697
698#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \
699 u32 action[] = { \
700 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
701 e->guc->id, \
702 GUC_CONTEXT_##enable_disable, \
703 }
704
705static void disable_scheduling_deregister(struct xe_guc *guc,
706 struct xe_engine *e)
707{
708 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
709 int ret;
710
711 set_min_preemption_timeout(guc, e);
712 smp_rmb();
713 ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
714 guc_read_stopped(guc), HZ * 5);
715 if (!ret) {
716 struct xe_gpu_scheduler *sched = &e->guc->sched;
717
718 XE_WARN_ON("Pending enable failed to respond");
719 xe_sched_submission_start(sched);
720 xe_gt_reset_async(e->gt);
721 xe_sched_tdr_queue_imm(sched);
722 return;
723 }
724
725 clear_engine_enabled(e);
726 set_engine_pending_disable(e);
727 set_engine_destroyed(e);
728 trace_xe_engine_scheduling_disable(e);
729
730 /*
731 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
732 * handler and we are not allowed to reserved G2H space in handlers.
733 */
734 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
735 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
736 G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
737}
738
739static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
740
741#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
742static void simple_error_capture(struct xe_engine *e)
743{
744 struct xe_guc *guc = engine_to_guc(e);
745 struct drm_printer p = drm_err_printer("");
746 struct xe_hw_engine *hwe;
747 enum xe_hw_engine_id id;
748 u32 adj_logical_mask = e->logical_mask;
749 u32 width_mask = (0x1 << e->width) - 1;
750 int i;
751 bool cookie;
752
753 if (e->vm && !e->vm->error_capture.capture_once) {
754 e->vm->error_capture.capture_once = true;
755 cookie = dma_fence_begin_signalling();
756 for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
757 if (adj_logical_mask & BIT(i)) {
758 adj_logical_mask |= width_mask << i;
759 i += e->width;
760 } else {
761 ++i;
762 }
763 }
764
765 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
766 xe_guc_ct_print(&guc->ct, &p);
767 guc_engine_print(e, &p);
768 for_each_hw_engine(hwe, guc_to_gt(guc), id) {
769 if (hwe->class != e->hwe->class ||
770 !(BIT(hwe->logical_instance) & adj_logical_mask))
771 continue;
772 xe_hw_engine_print_state(hwe, &p);
773 }
774 xe_analyze_vm(&p, e->vm, e->gt->info.id);
775 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
776 dma_fence_end_signalling(cookie);
777 }
778}
779#else
780static void simple_error_capture(struct xe_engine *e)
781{
782}
783#endif
784
785static enum drm_gpu_sched_stat
786guc_engine_timedout_job(struct drm_sched_job *drm_job)
787{
788 struct xe_sched_job *job = to_xe_sched_job(drm_job);
789 struct xe_sched_job *tmp_job;
790 struct xe_engine *e = job->engine;
791 struct xe_gpu_scheduler *sched = &e->guc->sched;
792 struct xe_device *xe = guc_to_xe(engine_to_guc(e));
793 int err = -ETIME;
794 int i = 0;
795
796 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
797 XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
798 XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
799
800 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
801 xe_sched_job_seqno(job), e->guc->id, e->flags);
802 simple_error_capture(e);
803 } else {
804 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
805 xe_sched_job_seqno(job), e->guc->id, e->flags);
806 }
807 trace_xe_sched_job_timedout(job);
808
809 /* Kill the run_job entry point */
810 xe_sched_submission_stop(sched);
811
812 /*
813 * Kernel jobs should never fail, nor should VM jobs if they do
814 * somethings has gone wrong and the GT needs a reset
815 */
816 if (e->flags & ENGINE_FLAG_KERNEL ||
817 (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
818 if (!xe_sched_invalidate_job(job, 2)) {
819 xe_sched_add_pending_job(sched, job);
820 xe_sched_submission_start(sched);
821 xe_gt_reset_async(e->gt);
822 goto out;
823 }
824 }
825
826 /* Engine state now stable, disable scheduling if needed */
827 if (engine_enabled(e)) {
828 struct xe_guc *guc = engine_to_guc(e);
829 int ret;
830
831 if (engine_reset(e))
832 err = -EIO;
833 set_engine_banned(e);
834 xe_engine_get(e);
835 disable_scheduling_deregister(engine_to_guc(e), e);
836
837 /*
838 * Must wait for scheduling to be disabled before signalling
839 * any fences, if GT broken the GT reset code should signal us.
840 *
841 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
842 * error) messages which can cause the schedule disable to get
843 * lost. If this occurs, trigger a GT reset to recover.
844 */
845 smp_rmb();
846 ret = wait_event_timeout(guc->ct.wq,
847 !engine_pending_disable(e) ||
848 guc_read_stopped(guc), HZ * 5);
849 if (!ret) {
850 XE_WARN_ON("Schedule disable failed to respond");
851 xe_sched_add_pending_job(sched, job);
852 xe_sched_submission_start(sched);
853 xe_gt_reset_async(e->gt);
854 xe_sched_tdr_queue_imm(sched);
855 goto out;
856 }
857 }
858
859 /* Stop fence signaling */
860 xe_hw_fence_irq_stop(e->fence_irq);
861
862 /*
863 * Fence state now stable, stop / start scheduler which cleans up any
864 * fences that are complete
865 */
866 xe_sched_add_pending_job(sched, job);
867 xe_sched_submission_start(sched);
868 xe_sched_tdr_queue_imm(&e->guc->sched);
869
870 /* Mark all outstanding jobs as bad, thus completing them */
871 spin_lock(&sched->base.job_list_lock);
872 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
873 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
874 spin_unlock(&sched->base.job_list_lock);
875
876 /* Start fence signaling */
877 xe_hw_fence_irq_start(e->fence_irq);
878
879out:
880 return DRM_GPU_SCHED_STAT_NOMINAL;
881}
882
883static void __guc_engine_fini_async(struct work_struct *w)
884{
885 struct xe_guc_engine *ge =
886 container_of(w, struct xe_guc_engine, fini_async);
887 struct xe_engine *e = ge->engine;
888 struct xe_guc *guc = engine_to_guc(e);
889
890 trace_xe_engine_destroy(e);
891
892 if (e->flags & ENGINE_FLAG_PERSISTENT)
893 xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
894 release_guc_id(guc, e);
895 xe_sched_entity_fini(&ge->entity);
896 xe_sched_fini(&ge->sched);
897
898 if (!(e->flags & ENGINE_FLAG_KERNEL)) {
899 kfree(ge);
900 xe_engine_fini(e);
901 }
902}
903
904static void guc_engine_fini_async(struct xe_engine *e)
905{
906 bool kernel = e->flags & ENGINE_FLAG_KERNEL;
907
908 INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
909 queue_work(system_unbound_wq, &e->guc->fini_async);
910
911 /* We must block on kernel engines so slabs are empty on driver unload */
912 if (kernel) {
913 struct xe_guc_engine *ge = e->guc;
914
915 flush_work(&ge->fini_async);
916 kfree(ge);
917 xe_engine_fini(e);
918 }
919}
920
921static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
922{
923 /*
924 * Might be done from within the GPU scheduler, need to do async as we
925 * fini the scheduler when the engine is fini'd, the scheduler can't
926 * complete fini within itself (circular dependency). Async resolves
927 * this we and don't really care when everything is fini'd, just that it
928 * is.
929 */
930 guc_engine_fini_async(e);
931}
932
933static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
934{
935 struct xe_engine *e = msg->private_data;
936 struct xe_guc *guc = engine_to_guc(e);
937
938 XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
939 trace_xe_engine_cleanup_entity(e);
940
941 if (engine_registered(e))
942 disable_scheduling_deregister(guc, e);
943 else
944 __guc_engine_fini(guc, e);
945}
946
947static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
948{
949 return !engine_killed_or_banned(e) && engine_registered(e);
950}
951
952static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
953{
954 struct xe_engine *e = msg->private_data;
955 struct xe_guc *guc = engine_to_guc(e);
956
957 if (guc_engine_allowed_to_change_state(e))
958 init_policies(guc, e);
959 kfree(msg);
960}
961
962static void suspend_fence_signal(struct xe_engine *e)
963{
964 struct xe_guc *guc = engine_to_guc(e);
965
966 XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
967 !guc_read_stopped(guc));
968 XE_BUG_ON(!e->guc->suspend_pending);
969
970 e->guc->suspend_pending = false;
971 smp_wmb();
972 wake_up(&e->guc->suspend_wait);
973}
974
975static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
976{
977 struct xe_engine *e = msg->private_data;
978 struct xe_guc *guc = engine_to_guc(e);
979
980 if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
981 engine_enabled(e)) {
982 wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
983 guc_read_stopped(guc));
984
985 if (!guc_read_stopped(guc)) {
986 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
987 s64 since_resume_ms =
988 ktime_ms_delta(ktime_get(),
989 e->guc->resume_time);
990 s64 wait_ms = e->vm->preempt.min_run_period_ms -
991 since_resume_ms;
992
993 if (wait_ms > 0 && e->guc->resume_time)
994 msleep(wait_ms);
995
996 set_engine_suspended(e);
997 clear_engine_enabled(e);
998 set_engine_pending_disable(e);
999 trace_xe_engine_scheduling_disable(e);
1000
1001 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1002 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1003 }
1004 } else if (e->guc->suspend_pending) {
1005 set_engine_suspended(e);
1006 suspend_fence_signal(e);
1007 }
1008}
1009
1010static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
1011{
1012 struct xe_engine *e = msg->private_data;
1013 struct xe_guc *guc = engine_to_guc(e);
1014
1015 if (guc_engine_allowed_to_change_state(e)) {
1016 MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
1017
1018 e->guc->resume_time = RESUME_PENDING;
1019 clear_engine_suspended(e);
1020 set_engine_pending_enable(e);
1021 set_engine_enabled(e);
1022 trace_xe_engine_scheduling_enable(e);
1023
1024 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1025 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1026 } else {
1027 clear_engine_suspended(e);
1028 }
1029}
1030
1031#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
1032#define SET_SCHED_PROPS 2
1033#define SUSPEND 3
1034#define RESUME 4
1035
1036static void guc_engine_process_msg(struct xe_sched_msg *msg)
1037{
1038 trace_xe_sched_msg_recv(msg);
1039
1040 switch (msg->opcode) {
1041 case CLEANUP:
1042 __guc_engine_process_msg_cleanup(msg);
1043 break;
1044 case SET_SCHED_PROPS:
1045 __guc_engine_process_msg_set_sched_props(msg);
1046 break;
1047 case SUSPEND:
1048 __guc_engine_process_msg_suspend(msg);
1049 break;
1050 case RESUME:
1051 __guc_engine_process_msg_resume(msg);
1052 break;
1053 default:
1054 XE_BUG_ON("Unknown message type");
1055 }
1056}
1057
1058static const struct drm_sched_backend_ops drm_sched_ops = {
1059 .run_job = guc_engine_run_job,
1060 .free_job = guc_engine_free_job,
1061 .timedout_job = guc_engine_timedout_job,
1062};
1063
1064static const struct xe_sched_backend_ops xe_sched_ops = {
1065 .process_msg = guc_engine_process_msg,
1066};
1067
1068static int guc_engine_init(struct xe_engine *e)
1069{
1070 struct xe_gpu_scheduler *sched;
1071 struct xe_guc *guc = engine_to_guc(e);
1072 struct xe_guc_engine *ge;
1073 long timeout;
1074 int err;
1075
1076 XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
1077
1078 ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1079 if (!ge)
1080 return -ENOMEM;
1081
1082 e->guc = ge;
1083 ge->engine = e;
1084 init_waitqueue_head(&ge->suspend_wait);
1085
1086 timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
1087 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
1088 e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
1089 64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
1090 e->name, gt_to_xe(e->gt)->drm.dev);
1091 if (err)
1092 goto err_free;
1093
1094 sched = &ge->sched;
1095 err = xe_sched_entity_init(&ge->entity, sched);
1096 if (err)
1097 goto err_sched;
1098 e->priority = XE_ENGINE_PRIORITY_NORMAL;
1099
1100 mutex_lock(&guc->submission_state.lock);
1101
1102 err = alloc_guc_id(guc, e);
1103 if (err)
1104 goto err_entity;
1105
1106 e->entity = &ge->entity;
1107
1108 if (guc_read_stopped(guc))
1109 xe_sched_stop(sched);
1110
1111 mutex_unlock(&guc->submission_state.lock);
1112
1113 switch (e->class) {
1114 case XE_ENGINE_CLASS_RENDER:
1115 sprintf(e->name, "rcs%d", e->guc->id);
1116 break;
1117 case XE_ENGINE_CLASS_VIDEO_DECODE:
1118 sprintf(e->name, "vcs%d", e->guc->id);
1119 break;
1120 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1121 sprintf(e->name, "vecs%d", e->guc->id);
1122 break;
1123 case XE_ENGINE_CLASS_COPY:
1124 sprintf(e->name, "bcs%d", e->guc->id);
1125 break;
1126 case XE_ENGINE_CLASS_COMPUTE:
1127 sprintf(e->name, "ccs%d", e->guc->id);
1128 break;
1129 default:
1130 XE_WARN_ON(e->class);
1131 }
1132
1133 trace_xe_engine_create(e);
1134
1135 return 0;
1136
1137err_entity:
1138 xe_sched_entity_fini(&ge->entity);
1139err_sched:
1140 xe_sched_fini(&ge->sched);
1141err_free:
1142 kfree(ge);
1143
1144 return err;
1145}
1146
1147static void guc_engine_kill(struct xe_engine *e)
1148{
1149 trace_xe_engine_kill(e);
1150 set_engine_killed(e);
1151 xe_sched_tdr_queue_imm(&e->guc->sched);
1152}
1153
1154static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
1155 u32 opcode)
1156{
1157 INIT_LIST_HEAD(&msg->link);
1158 msg->opcode = opcode;
1159 msg->private_data = e;
1160
1161 trace_xe_sched_msg_add(msg);
1162 xe_sched_add_msg(&e->guc->sched, msg);
1163}
1164
1165#define STATIC_MSG_CLEANUP 0
1166#define STATIC_MSG_SUSPEND 1
1167#define STATIC_MSG_RESUME 2
1168static void guc_engine_fini(struct xe_engine *e)
1169{
1170 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
1171
1172 if (!(e->flags & ENGINE_FLAG_KERNEL))
1173 guc_engine_add_msg(e, msg, CLEANUP);
1174 else
1175 __guc_engine_fini(engine_to_guc(e), e);
1176}
1177
1178static int guc_engine_set_priority(struct xe_engine *e,
1179 enum xe_engine_priority priority)
1180{
1181 struct xe_sched_msg *msg;
1182
1183 if (e->priority == priority || engine_killed_or_banned(e))
1184 return 0;
1185
1186 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1187 if (!msg)
1188 return -ENOMEM;
1189
1190 guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
1191 e->priority = priority;
1192
1193 return 0;
1194}
1195
1196static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
1197{
1198 struct xe_sched_msg *msg;
1199
1200 if (e->sched_props.timeslice_us == timeslice_us ||
1201 engine_killed_or_banned(e))
1202 return 0;
1203
1204 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1205 if (!msg)
1206 return -ENOMEM;
1207
1208 e->sched_props.timeslice_us = timeslice_us;
1209 guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
1210
1211 return 0;
1212}
1213
1214static int guc_engine_set_preempt_timeout(struct xe_engine *e,
1215 u32 preempt_timeout_us)
1216{
1217 struct xe_sched_msg *msg;
1218
1219 if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
1220 engine_killed_or_banned(e))
1221 return 0;
1222
1223 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1224 if (!msg)
1225 return -ENOMEM;
1226
1227 e->sched_props.preempt_timeout_us = preempt_timeout_us;
1228 guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
1229
1230 return 0;
1231}
1232
1233static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
1234{
1235 struct xe_gpu_scheduler *sched = &e->guc->sched;
1236
1237 XE_BUG_ON(engine_registered(e));
1238 XE_BUG_ON(engine_banned(e));
1239 XE_BUG_ON(engine_killed(e));
1240
1241 sched->base.timeout = job_timeout_ms;
1242
1243 return 0;
1244}
1245
1246static int guc_engine_suspend(struct xe_engine *e)
1247{
1248 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
1249
1250 if (engine_killed_or_banned(e) || e->guc->suspend_pending)
1251 return -EINVAL;
1252
1253 e->guc->suspend_pending = true;
1254 guc_engine_add_msg(e, msg, SUSPEND);
1255
1256 return 0;
1257}
1258
1259static void guc_engine_suspend_wait(struct xe_engine *e)
1260{
1261 struct xe_guc *guc = engine_to_guc(e);
1262
1263 wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
1264 guc_read_stopped(guc));
1265}
1266
1267static void guc_engine_resume(struct xe_engine *e)
1268{
1269 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
1270
1271 XE_BUG_ON(e->guc->suspend_pending);
1272
1273 xe_mocs_init_engine(e);
1274 guc_engine_add_msg(e, msg, RESUME);
1275}
1276
1277/*
1278 * All of these functions are an abstraction layer which other parts of XE can
1279 * use to trap into the GuC backend. All of these functions, aside from init,
1280 * really shouldn't do much other than trap into the DRM scheduler which
1281 * synchronizes these operations.
1282 */
1283static const struct xe_engine_ops guc_engine_ops = {
1284 .init = guc_engine_init,
1285 .kill = guc_engine_kill,
1286 .fini = guc_engine_fini,
1287 .set_priority = guc_engine_set_priority,
1288 .set_timeslice = guc_engine_set_timeslice,
1289 .set_preempt_timeout = guc_engine_set_preempt_timeout,
1290 .set_job_timeout = guc_engine_set_job_timeout,
1291 .suspend = guc_engine_suspend,
1292 .suspend_wait = guc_engine_suspend_wait,
1293 .resume = guc_engine_resume,
1294};
1295
1296static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
1297{
1298 struct xe_gpu_scheduler *sched = &e->guc->sched;
1299
1300 /* Stop scheduling + flush any DRM scheduler operations */
1301 xe_sched_submission_stop(sched);
1302
1303 /* Clean up lost G2H + reset engine state */
1304 if (engine_destroyed(e) && engine_registered(e)) {
1305 if (engine_banned(e))
1306 xe_engine_put(e);
1307 else
1308 __guc_engine_fini(guc, e);
1309 }
1310 if (e->guc->suspend_pending) {
1311 set_engine_suspended(e);
1312 suspend_fence_signal(e);
1313 }
1314 atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
1315 &e->guc->state);
1316 e->guc->resume_time = 0;
1317 trace_xe_engine_stop(e);
1318
1319 /*
1320 * Ban any engine (aside from kernel and engines used for VM ops) with a
1321 * started but not complete job or if a job has gone through a GT reset
1322 * more than twice.
1323 */
1324 if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
1325 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1326
1327 if (job) {
1328 if ((xe_sched_job_started(job) &&
1329 !xe_sched_job_completed(job)) ||
1330 xe_sched_invalidate_job(job, 2)) {
1331 trace_xe_sched_job_ban(job);
1332 xe_sched_tdr_queue_imm(&e->guc->sched);
1333 set_engine_banned(e);
1334 }
1335 }
1336 }
1337}
1338
1339int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1340{
1341 int ret;
1342
1343 /*
1344 * Using an atomic here rather than submission_state.lock as this
1345 * function can be called while holding the CT lock (engine reset
1346 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1347 * Atomic is not ideal, but it works to prevent against concurrent reset
1348 * and releasing any TDRs waiting on guc->submission_state.stopped.
1349 */
1350 ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1351 smp_wmb();
1352 wake_up_all(&guc->ct.wq);
1353
1354 return ret;
1355}
1356
1357void xe_guc_submit_reset_wait(struct xe_guc *guc)
1358{
1359 wait_event(guc->ct.wq, !guc_read_stopped(guc));
1360}
1361
1362int xe_guc_submit_stop(struct xe_guc *guc)
1363{
1364 struct xe_engine *e;
1365 unsigned long index;
1366
1367 XE_BUG_ON(guc_read_stopped(guc) != 1);
1368
1369 mutex_lock(&guc->submission_state.lock);
1370
1371 xa_for_each(&guc->submission_state.engine_lookup, index, e)
1372 guc_engine_stop(guc, e);
1373
1374 mutex_unlock(&guc->submission_state.lock);
1375
1376 /*
1377 * No one can enter the backend at this point, aside from new engine
1378 * creation which is protected by guc->submission_state.lock.
1379 */
1380
1381 return 0;
1382}
1383
1384static void guc_engine_start(struct xe_engine *e)
1385{
1386 struct xe_gpu_scheduler *sched = &e->guc->sched;
1387
1388 if (!engine_killed_or_banned(e)) {
1389 int i;
1390
1391 trace_xe_engine_resubmit(e);
1392 for (i = 0; i < e->width; ++i)
1393 xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
1394 xe_sched_resubmit_jobs(sched);
1395 }
1396
1397 xe_sched_submission_start(sched);
1398}
1399
1400int xe_guc_submit_start(struct xe_guc *guc)
1401{
1402 struct xe_engine *e;
1403 unsigned long index;
1404
1405 XE_BUG_ON(guc_read_stopped(guc) != 1);
1406
1407 mutex_lock(&guc->submission_state.lock);
1408 atomic_dec(&guc->submission_state.stopped);
1409 xa_for_each(&guc->submission_state.engine_lookup, index, e)
1410 guc_engine_start(e);
1411 mutex_unlock(&guc->submission_state.lock);
1412
1413 wake_up_all(&guc->ct.wq);
1414
1415 return 0;
1416}
1417
1418static struct xe_engine *
1419g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
1420{
1421 struct xe_device *xe = guc_to_xe(guc);
1422 struct xe_engine *e;
1423
1424 if (unlikely(guc_id >= GUC_ID_MAX)) {
1425 drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1426 return NULL;
1427 }
1428
1429 e = xa_load(&guc->submission_state.engine_lookup, guc_id);
1430 if (unlikely(!e)) {
1431 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1432 return NULL;
1433 }
1434
1435 XE_BUG_ON(e->guc->id != guc_id);
1436
1437 return e;
1438}
1439
1440static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
1441{
1442 u32 action[] = {
1443 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1444 e->guc->id,
1445 };
1446
1447 trace_xe_engine_deregister(e);
1448
1449 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1450}
1451
1452int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1453{
1454 struct xe_device *xe = guc_to_xe(guc);
1455 struct xe_engine *e;
1456 u32 guc_id = msg[0];
1457
1458 if (unlikely(len < 2)) {
1459 drm_err(&xe->drm, "Invalid length %u", len);
1460 return -EPROTO;
1461 }
1462
1463 e = g2h_engine_lookup(guc, guc_id);
1464 if (unlikely(!e))
1465 return -EPROTO;
1466
1467 if (unlikely(!engine_pending_enable(e) &&
1468 !engine_pending_disable(e))) {
1469 drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1470 atomic_read(&e->guc->state));
1471 return -EPROTO;
1472 }
1473
1474 trace_xe_engine_scheduling_done(e);
1475
1476 if (engine_pending_enable(e)) {
1477 e->guc->resume_time = ktime_get();
1478 clear_engine_pending_enable(e);
1479 smp_wmb();
1480 wake_up_all(&guc->ct.wq);
1481 } else {
1482 clear_engine_pending_disable(e);
1483 if (e->guc->suspend_pending) {
1484 suspend_fence_signal(e);
1485 } else {
1486 if (engine_banned(e)) {
1487 smp_wmb();
1488 wake_up_all(&guc->ct.wq);
1489 }
1490 deregister_engine(guc, e);
1491 }
1492 }
1493
1494 return 0;
1495}
1496
1497int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1498{
1499 struct xe_device *xe = guc_to_xe(guc);
1500 struct xe_engine *e;
1501 u32 guc_id = msg[0];
1502
1503 if (unlikely(len < 1)) {
1504 drm_err(&xe->drm, "Invalid length %u", len);
1505 return -EPROTO;
1506 }
1507
1508 e = g2h_engine_lookup(guc, guc_id);
1509 if (unlikely(!e))
1510 return -EPROTO;
1511
1512 if (!engine_destroyed(e) || engine_pending_disable(e) ||
1513 engine_pending_enable(e) || engine_enabled(e)) {
1514 drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1515 atomic_read(&e->guc->state));
1516 return -EPROTO;
1517 }
1518
1519 trace_xe_engine_deregister_done(e);
1520
1521 clear_engine_registered(e);
1522 if (engine_banned(e))
1523 xe_engine_put(e);
1524 else
1525 __guc_engine_fini(guc, e);
1526
1527 return 0;
1528}
1529
1530int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1531{
1532 struct xe_device *xe = guc_to_xe(guc);
1533 struct xe_engine *e;
1534 u32 guc_id = msg[0];
1535
1536 if (unlikely(len < 1)) {
1537 drm_err(&xe->drm, "Invalid length %u", len);
1538 return -EPROTO;
1539 }
1540
1541 e = g2h_engine_lookup(guc, guc_id);
1542 if (unlikely(!e))
1543 return -EPROTO;
1544
1545 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
1546
1547 /* FIXME: Do error capture, most likely async */
1548
1549 trace_xe_engine_reset(e);
1550
1551 /*
1552 * A banned engine is a NOP at this point (came from
1553 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
1554 * jobs by setting timeout of the job to the minimum value kicking
1555 * guc_engine_timedout_job.
1556 */
1557 set_engine_reset(e);
1558 if (!engine_banned(e))
1559 xe_sched_tdr_queue_imm(&e->guc->sched);
1560
1561 return 0;
1562}
1563
1564int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
1565 u32 len)
1566{
1567 struct xe_device *xe = guc_to_xe(guc);
1568 struct xe_engine *e;
1569 u32 guc_id = msg[0];
1570
1571 if (unlikely(len < 1)) {
1572 drm_err(&xe->drm, "Invalid length %u", len);
1573 return -EPROTO;
1574 }
1575
1576 e = g2h_engine_lookup(guc, guc_id);
1577 if (unlikely(!e))
1578 return -EPROTO;
1579
1580 drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
1581 trace_xe_engine_memory_cat_error(e);
1582
1583 /* Treat the same as engine reset */
1584 set_engine_reset(e);
1585 if (!engine_banned(e))
1586 xe_sched_tdr_queue_imm(&e->guc->sched);
1587
1588 return 0;
1589}
1590
1591int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
1592{
1593 struct xe_device *xe = guc_to_xe(guc);
1594 u8 guc_class, instance;
1595 u32 reason;
1596
1597 if (unlikely(len != 3)) {
1598 drm_err(&xe->drm, "Invalid length %u", len);
1599 return -EPROTO;
1600 }
1601
1602 guc_class = msg[0];
1603 instance = msg[1];
1604 reason = msg[2];
1605
1606 /* Unexpected failure of a hardware feature, log an actual error */
1607 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
1608 guc_class, instance, reason);
1609
1610 xe_gt_reset_async(guc_to_gt(guc));
1611
1612 return 0;
1613}
1614
1615static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
1616{
1617 struct xe_guc *guc = engine_to_guc(e);
1618 struct xe_device *xe = guc_to_xe(guc);
1619 struct iosys_map map = xe_lrc_parallel_map(e->lrc);
1620 int i;
1621
1622 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
1623 e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
1624 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
1625 e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
1626 drm_printf(p, "\tWQ status: %u\n",
1627 parallel_read(xe, map, wq_desc.wq_status));
1628 if (parallel_read(xe, map, wq_desc.head) !=
1629 parallel_read(xe, map, wq_desc.tail)) {
1630 for (i = parallel_read(xe, map, wq_desc.head);
1631 i != parallel_read(xe, map, wq_desc.tail);
1632 i = (i + sizeof(u32)) % WQ_SIZE)
857912c3 1633 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
dd08ebf6
MB
1634 parallel_read(xe, map, wq[i / sizeof(u32)]));
1635 }
1636}
1637
1638static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
1639{
1640 struct xe_gpu_scheduler *sched = &e->guc->sched;
1641 struct xe_sched_job *job;
1642 int i;
1643
1644 drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
1645 drm_printf(p, "\tName: %s\n", e->name);
1646 drm_printf(p, "\tClass: %d\n", e->class);
1647 drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
1648 drm_printf(p, "\tWidth: %d\n", e->width);
1649 drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
1650 drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout);
1651 drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
1652 drm_printf(p, "\tPreempt timeout: %u (us)\n",
1653 e->sched_props.preempt_timeout_us);
1654 for (i = 0; i < e->width; ++i ) {
1655 struct xe_lrc *lrc = e->lrc + i;
1656
1657 drm_printf(p, "\tHW Context Desc: 0x%08x\n",
1658 lower_32_bits(xe_lrc_ggtt_addr(lrc)));
1659 drm_printf(p, "\tLRC Head: (memory) %u\n",
1660 xe_lrc_ring_head(lrc));
1661 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1662 lrc->ring.tail,
1663 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
1664 drm_printf(p, "\tStart seqno: (memory) %d\n",
1665 xe_lrc_start_seqno(lrc));
1666 drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
1667 }
1668 drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
1669 drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
1670 if (xe_engine_is_parallel(e))
1671 guc_engine_wq_print(e, p);
1672
1673 spin_lock(&sched->base.job_list_lock);
1674 list_for_each_entry(job, &sched->base.pending_list, drm.list)
1675 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
1676 xe_sched_job_seqno(job),
1677 dma_fence_is_signaled(job->fence) ? 1 : 0,
1678 dma_fence_is_signaled(&job->drm.s_fence->finished) ?
1679 1 : 0);
1680 spin_unlock(&sched->base.job_list_lock);
1681}
1682
1683void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
1684{
1685 struct xe_engine *e;
1686 unsigned long index;
1687
1688 if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
1689 return;
1690
1691 mutex_lock(&guc->submission_state.lock);
1692 xa_for_each(&guc->submission_state.engine_lookup, index, e)
1693 guc_engine_print(e, p);
1694 mutex_unlock(&guc->submission_state.lock);
1695}