1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014 Intel Corporation
6 #include <linux/circ_buf.h>
8 #include "gem/i915_gem_context.h"
9 #include "gem/i915_gem_lmem.h"
10 #include "gt/gen8_engine_cs.h"
11 #include "gt/intel_breadcrumbs.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_engine_pm.h"
15 #include "gt/intel_engine_regs.h"
16 #include "gt/intel_gpu_commands.h"
17 #include "gt/intel_gt.h"
18 #include "gt/intel_gt_clock_utils.h"
19 #include "gt/intel_gt_irq.h"
20 #include "gt/intel_gt_pm.h"
21 #include "gt/intel_gt_regs.h"
22 #include "gt/intel_gt_requests.h"
23 #include "gt/intel_lrc.h"
24 #include "gt/intel_lrc_reg.h"
25 #include "gt/intel_mocs.h"
26 #include "gt/intel_ring.h"
28 #include "intel_guc_ads.h"
29 #include "intel_guc_capture.h"
30 #include "intel_guc_submission.h"
34 #include "i915_trace.h"
37 * DOC: GuC-based command submission
39 * The Scratch registers:
40 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
41 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
42 * triggers an interrupt on the GuC via another register write (0xC4C8).
43 * Firmware writes a success/fail code back to the action register after
44 * processes the request. The kernel driver polls waiting for this update and
47 * Command Transport buffers (CTBs):
48 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
49 * - G2H) are a message interface between the i915 and GuC.
51 * Context registration:
52 * Before a context can be submitted it must be registered with the GuC via a
53 * H2G. A unique guc_id is associated with each context. The context is either
54 * registered at request creation time (normal operation) or at submission time
55 * (abnormal operation, e.g. after a reset).
58 * The i915 updates the LRC tail value in memory. The i915 must enable the
59 * scheduling of the context within the GuC for the GuC to actually consider it.
60 * Therefore, the first time a disabled context is submitted we use a schedule
61 * enable H2G, while follow up submissions are done via the context submit H2G,
62 * which informs the GuC that a previously enabled context has new work
66 * To unpin a context a H2G is used to disable scheduling. When the
67 * corresponding G2H returns indicating the scheduling disable operation has
68 * completed it is safe to unpin the context. While a disable is in flight it
69 * isn't safe to resubmit the context so a fence is used to stall all future
70 * requests of that context until the G2H is returned. Because this interaction
71 * with the GuC takes a non-zero amount of time we delay the disabling of
72 * scheduling after the pin count goes to zero by a configurable period of time
73 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
74 * time to resubmit something on the context before doing this costly operation.
75 * This delay is only done if the context isn't closed and the guc_id usage is
76 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
78 * Context deregistration:
79 * Before a context can be destroyed or if we steal its guc_id we must
80 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
81 * safe to submit anything to this guc_id until the deregister completes so a
82 * fence is used to stall all requests associated with this guc_id until the
83 * corresponding G2H returns indicating the guc_id has been deregistered.
85 * submission_state.guc_ids:
86 * Unique number associated with private GuC context data passed in during
87 * context registration / submission / deregistration. 64k available. Simple ida
88 * is used for allocation.
91 * If no guc_ids are available they can be stolen from another context at
92 * request creation time if that context is unpinned. If a guc_id can't be found
93 * we punt this problem to the user as we believe this is near impossible to hit
94 * during normal use cases.
97 * In the GuC submission code we have 3 basic spin locks which protect
98 * everything. Details about each below.
101 * This is the submission lock for all contexts that share an i915 schedule
102 * engine (sched_engine), thus only one of the contexts which share a
103 * sched_engine can be submitting at a time. Currently only one sched_engine is
104 * used for all of GuC submission but that could change in the future.
106 * guc->submission_state.lock
107 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
111 * Protects everything under ce->guc_state. Ensures that a context is in the
112 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
113 * on a disabled context (bad idea), we don't issue a schedule enable when a
114 * schedule disable is in flight, etc... Also protects list of inflight requests
115 * on the context and the priority management state. Lock is individual to each
118 * Lock ordering rules:
119 * sched_engine->lock -> ce->guc_state.lock
120 * guc->submission_state.lock -> ce->guc_state.lock
123 * When a full GT reset is triggered it is assumed that some G2H responses to
124 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
125 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
126 * contexts, release guc_ids, etc...). When this occurs we can scrub the
127 * context state and cleanup appropriately, however this is quite racey.
128 * To avoid races, the reset code must disable submission before scrubbing for
129 * the missing G2H, while the submission code must check for submission being
130 * disabled and skip sending H2Gs and updating context states when it is. Both
131 * sides must also make sure to hold the relevant locks.
134 /* GuC Virtual Engine */
135 struct guc_virtual_engine {
136 struct intel_engine_cs base;
137 struct intel_context context;
140 static struct intel_context *
141 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
142 unsigned long flags);
144 static struct intel_context *
145 guc_create_parallel(struct intel_engine_cs **engines,
146 unsigned int num_siblings,
149 #define GUC_REQUEST_SIZE 64 /* bytes */
152 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
153 * per the GuC submission interface. A different allocation algorithm is used
154 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
155 * partition the guc_id space. We believe the number of multi-lrc contexts in
156 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
159 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
160 ((guc)->submission_state.num_guc_ids / 16)
163 * Below is a set of functions which control the GuC scheduling state which
166 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
167 #define SCHED_STATE_DESTROYED BIT(1)
168 #define SCHED_STATE_PENDING_DISABLE BIT(2)
169 #define SCHED_STATE_BANNED BIT(3)
170 #define SCHED_STATE_ENABLED BIT(4)
171 #define SCHED_STATE_PENDING_ENABLE BIT(5)
172 #define SCHED_STATE_REGISTERED BIT(6)
173 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
174 #define SCHED_STATE_CLOSED BIT(8)
175 #define SCHED_STATE_BLOCKED_SHIFT 9
176 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
177 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
179 static inline void init_sched_state(struct intel_context *ce)
181 lockdep_assert_held(&ce->guc_state.lock);
182 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
186 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
187 * A context close can race with the submission path, so SCHED_STATE_CLOSED
188 * can be set immediately before we try to register.
190 #define SCHED_STATE_VALID_INIT \
191 (SCHED_STATE_BLOCKED_MASK | \
192 SCHED_STATE_CLOSED | \
193 SCHED_STATE_REGISTERED)
196 static bool sched_state_is_init(struct intel_context *ce)
198 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
202 context_wait_for_deregister_to_register(struct intel_context *ce)
204 return ce->guc_state.sched_state &
205 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
209 set_context_wait_for_deregister_to_register(struct intel_context *ce)
211 lockdep_assert_held(&ce->guc_state.lock);
212 ce->guc_state.sched_state |=
213 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
217 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
219 lockdep_assert_held(&ce->guc_state.lock);
220 ce->guc_state.sched_state &=
221 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
225 context_destroyed(struct intel_context *ce)
227 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
231 set_context_destroyed(struct intel_context *ce)
233 lockdep_assert_held(&ce->guc_state.lock);
234 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
237 static inline bool context_pending_disable(struct intel_context *ce)
239 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
242 static inline void set_context_pending_disable(struct intel_context *ce)
244 lockdep_assert_held(&ce->guc_state.lock);
245 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
248 static inline void clr_context_pending_disable(struct intel_context *ce)
250 lockdep_assert_held(&ce->guc_state.lock);
251 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
254 static inline bool context_banned(struct intel_context *ce)
256 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
259 static inline void set_context_banned(struct intel_context *ce)
261 lockdep_assert_held(&ce->guc_state.lock);
262 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
265 static inline void clr_context_banned(struct intel_context *ce)
267 lockdep_assert_held(&ce->guc_state.lock);
268 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
271 static inline bool context_enabled(struct intel_context *ce)
273 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
276 static inline void set_context_enabled(struct intel_context *ce)
278 lockdep_assert_held(&ce->guc_state.lock);
279 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
282 static inline void clr_context_enabled(struct intel_context *ce)
284 lockdep_assert_held(&ce->guc_state.lock);
285 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
288 static inline bool context_pending_enable(struct intel_context *ce)
290 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
293 static inline void set_context_pending_enable(struct intel_context *ce)
295 lockdep_assert_held(&ce->guc_state.lock);
296 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
299 static inline void clr_context_pending_enable(struct intel_context *ce)
301 lockdep_assert_held(&ce->guc_state.lock);
302 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
305 static inline bool context_registered(struct intel_context *ce)
307 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
310 static inline void set_context_registered(struct intel_context *ce)
312 lockdep_assert_held(&ce->guc_state.lock);
313 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
316 static inline void clr_context_registered(struct intel_context *ce)
318 lockdep_assert_held(&ce->guc_state.lock);
319 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
322 static inline bool context_policy_required(struct intel_context *ce)
324 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
327 static inline void set_context_policy_required(struct intel_context *ce)
329 lockdep_assert_held(&ce->guc_state.lock);
330 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
333 static inline void clr_context_policy_required(struct intel_context *ce)
335 lockdep_assert_held(&ce->guc_state.lock);
336 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
339 static inline bool context_close_done(struct intel_context *ce)
341 return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
344 static inline void set_context_close_done(struct intel_context *ce)
346 lockdep_assert_held(&ce->guc_state.lock);
347 ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
350 static inline u32 context_blocked(struct intel_context *ce)
352 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
353 SCHED_STATE_BLOCKED_SHIFT;
356 static inline void incr_context_blocked(struct intel_context *ce)
358 lockdep_assert_held(&ce->guc_state.lock);
360 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
362 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
365 static inline void decr_context_blocked(struct intel_context *ce)
367 lockdep_assert_held(&ce->guc_state.lock);
369 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
371 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
374 static struct intel_context *
375 request_to_scheduling_context(struct i915_request *rq)
377 return intel_context_to_parent(rq->context);
380 static inline bool context_guc_id_invalid(struct intel_context *ce)
382 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
385 static inline void set_context_guc_id_invalid(struct intel_context *ce)
387 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
390 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
392 return &ce->engine->gt->uc.guc;
395 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
397 return rb_entry(rb, struct i915_priolist, node);
401 * When using multi-lrc submission a scratch memory area is reserved in the
402 * parent's context state for the process descriptor, work queue, and handshake
403 * between the parent + children contexts to insert safe preemption points
404 * between each of the BBs. Currently the scratch area is sized to a page.
406 * The layout of this scratch area is below:
408 * + sizeof(struct guc_process_desc) child go
409 * + CACHELINE_BYTES child join[0]
411 * + CACHELINE_BYTES child join[n - 1]
413 * PARENT_SCRATCH_SIZE / 2 work queue start
415 * PARENT_SCRATCH_SIZE - 1 work queue end
417 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
418 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
420 struct sync_semaphore {
422 u8 unused[CACHELINE_BYTES - sizeof(u32)];
425 struct parent_scratch {
427 struct guc_sched_wq_desc wq_desc;
428 struct guc_process_desc_v69 pdesc;
431 struct sync_semaphore go;
432 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
434 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
435 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
437 u32 wq[WQ_SIZE / sizeof(u32)];
440 static u32 __get_parent_scratch_offset(struct intel_context *ce)
442 GEM_BUG_ON(!ce->parallel.guc.parent_page);
444 return ce->parallel.guc.parent_page * PAGE_SIZE;
447 static u32 __get_wq_offset(struct intel_context *ce)
449 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
451 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
454 static struct parent_scratch *
455 __get_parent_scratch(struct intel_context *ce)
457 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
458 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
461 * Need to subtract LRC_STATE_OFFSET here as the
462 * parallel.guc.parent_page is the offset into ce->state while
463 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
465 return (struct parent_scratch *)
467 ((__get_parent_scratch_offset(ce) -
468 LRC_STATE_OFFSET) / sizeof(u32)));
471 static struct guc_process_desc_v69 *
472 __get_process_desc_v69(struct intel_context *ce)
474 struct parent_scratch *ps = __get_parent_scratch(ce);
476 return &ps->descs.pdesc;
479 static struct guc_sched_wq_desc *
480 __get_wq_desc_v70(struct intel_context *ce)
482 struct parent_scratch *ps = __get_parent_scratch(ce);
484 return &ps->descs.wq_desc;
487 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
490 * Check for space in work queue. Caching a value of head pointer in
491 * intel_context structure in order reduce the number accesses to shared
492 * GPU memory which may be across a PCIe bus.
494 #define AVAILABLE_SPACE \
495 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
496 if (wqi_size > AVAILABLE_SPACE) {
497 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
499 if (wqi_size > AVAILABLE_SPACE)
502 #undef AVAILABLE_SPACE
504 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
507 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
509 struct intel_context *ce = xa_load(&guc->context_lookup, id);
511 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
516 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
518 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
523 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
528 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
533 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
535 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
536 (void **)&guc->lrc_desc_pool_vaddr_v69);
543 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
545 if (!guc->lrc_desc_pool_vaddr_v69)
548 guc->lrc_desc_pool_vaddr_v69 = NULL;
549 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
552 static inline bool guc_submission_initialized(struct intel_guc *guc)
554 return guc->submission_initialized;
557 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
559 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
562 memset(desc, 0, sizeof(*desc));
565 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
567 return __get_context(guc, id);
570 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
571 struct intel_context *ce)
576 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
577 * lower level functions directly.
579 xa_lock_irqsave(&guc->context_lookup, flags);
580 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
581 xa_unlock_irqrestore(&guc->context_lookup, flags);
584 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
588 if (unlikely(!guc_submission_initialized(guc)))
591 _reset_lrc_desc_v69(guc, id);
594 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
595 * the lower level functions directly.
597 xa_lock_irqsave(&guc->context_lookup, flags);
598 __xa_erase(&guc->context_lookup, id);
599 xa_unlock_irqrestore(&guc->context_lookup, flags);
602 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
604 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
605 wake_up_all(&guc->ct.wq);
608 static int guc_submission_send_busy_loop(struct intel_guc *guc,
615 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
616 * so we don't handle the case where we don't get a reply because we
617 * aborted the send due to the channel being busy.
619 GEM_BUG_ON(g2h_len_dw && !loop);
622 atomic_inc(&guc->outstanding_submission_g2h);
624 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
627 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
632 const int state = interruptible ?
633 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
637 GEM_BUG_ON(timeout < 0);
639 if (!atomic_read(wait_var))
646 prepare_to_wait(&guc->ct.wq, &wait, state);
648 if (!atomic_read(wait_var))
651 if (signal_pending_state(state, current)) {
661 timeout = io_schedule_timeout(timeout);
663 finish_wait(&guc->ct.wq, &wait);
665 return (timeout < 0) ? timeout : 0;
668 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
670 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
673 return intel_guc_wait_for_pending_msg(guc,
674 &guc->outstanding_submission_g2h,
678 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
679 static int try_context_registration(struct intel_context *ce, bool loop);
681 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
684 struct intel_context *ce = request_to_scheduling_context(rq);
690 lockdep_assert_held(&rq->engine->sched_engine->lock);
693 * Corner case where requests were sitting in the priority list or a
694 * request resubmitted after the context was banned.
696 if (unlikely(!intel_context_is_schedulable(ce))) {
697 i915_request_put(i915_request_mark_eio(rq));
698 intel_engine_signal_breadcrumbs(ce->engine);
702 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
703 GEM_BUG_ON(context_guc_id_invalid(ce));
705 if (context_policy_required(ce)) {
706 err = guc_context_policy_init_v70(ce, false);
711 spin_lock(&ce->guc_state.lock);
714 * The request / context will be run on the hardware when scheduling
715 * gets enabled in the unblock. For multi-lrc we still submit the
716 * context to move the LRC tails.
718 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
721 enabled = context_enabled(ce) || context_blocked(ce);
724 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
725 action[len++] = ce->guc_id.id;
726 action[len++] = GUC_CONTEXT_ENABLE;
727 set_context_pending_enable(ce);
728 intel_context_get(ce);
729 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
731 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
732 action[len++] = ce->guc_id.id;
735 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
736 if (!enabled && !err) {
737 trace_intel_context_sched_enable(ce);
738 atomic_inc(&guc->outstanding_submission_g2h);
739 set_context_enabled(ce);
742 * Without multi-lrc KMD does the submission step (moving the
743 * lrc tail) so enabling scheduling is sufficient to submit the
744 * context. This isn't the case in multi-lrc submission as the
745 * GuC needs to move the tails, hence the need for another H2G
746 * to submit a multi-lrc context after enabling scheduling.
748 if (intel_context_is_parent(ce)) {
749 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
750 err = intel_guc_send_nb(guc, action, len - 1, 0);
752 } else if (!enabled) {
753 clr_context_pending_enable(ce);
754 intel_context_put(ce);
757 trace_i915_request_guc_submit(rq);
760 spin_unlock(&ce->guc_state.lock);
764 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
766 int ret = __guc_add_request(guc, rq);
768 if (unlikely(ret == -EBUSY)) {
769 guc->stalled_request = rq;
770 guc->submission_stall_reason = STALL_ADD_REQUEST;
776 static inline void guc_set_lrc_tail(struct i915_request *rq)
778 rq->context->lrc_reg_state[CTX_RING_TAIL] =
779 intel_ring_set_tail(rq->ring, rq->tail);
782 static inline int rq_prio(const struct i915_request *rq)
784 return rq->sched.attr.priority;
787 static bool is_multi_lrc_rq(struct i915_request *rq)
789 return intel_context_is_parallel(rq->context);
792 static bool can_merge_rq(struct i915_request *rq,
793 struct i915_request *last)
795 return request_to_scheduling_context(rq) ==
796 request_to_scheduling_context(last);
799 static u32 wq_space_until_wrap(struct intel_context *ce)
801 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
804 static void write_wqi(struct intel_context *ce, u32 wqi_size)
806 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
809 * Ensure WQI are visible before updating tail
811 intel_guc_write_barrier(ce_to_guc(ce));
813 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
815 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
818 static int guc_wq_noop_append(struct intel_context *ce)
820 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
821 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
826 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
828 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
829 FIELD_PREP(WQ_LEN_MASK, len_dw);
830 ce->parallel.guc.wqi_tail = 0;
835 static int __guc_wq_item_append(struct i915_request *rq)
837 struct intel_context *ce = request_to_scheduling_context(rq);
838 struct intel_context *child;
839 unsigned int wqi_size = (ce->parallel.number_children + 4) *
842 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
845 /* Ensure context is in correct state updating work queue */
846 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
847 GEM_BUG_ON(context_guc_id_invalid(ce));
848 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
849 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
851 /* Insert NOOP if this work queue item will wrap the tail pointer. */
852 if (wqi_size > wq_space_until_wrap(ce)) {
853 ret = guc_wq_noop_append(ce);
858 wqi = get_wq_pointer(ce, wqi_size);
862 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
864 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
865 FIELD_PREP(WQ_LEN_MASK, len_dw);
866 *wqi++ = ce->lrc.lrca;
867 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
868 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
869 *wqi++ = 0; /* fence_id */
870 for_each_child(ce, child)
871 *wqi++ = child->ring->tail / sizeof(u64);
873 write_wqi(ce, wqi_size);
878 static int guc_wq_item_append(struct intel_guc *guc,
879 struct i915_request *rq)
881 struct intel_context *ce = request_to_scheduling_context(rq);
884 if (unlikely(!intel_context_is_schedulable(ce)))
887 ret = __guc_wq_item_append(rq);
888 if (unlikely(ret == -EBUSY)) {
889 guc->stalled_request = rq;
890 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
896 static bool multi_lrc_submit(struct i915_request *rq)
898 struct intel_context *ce = request_to_scheduling_context(rq);
900 intel_ring_set_tail(rq->ring, rq->tail);
903 * We expect the front end (execbuf IOCTL) to set this flag on the last
904 * request generated from a multi-BB submission. This indicates to the
905 * backend (GuC interface) that we should submit this context thus
906 * submitting all the requests generated in parallel.
908 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
909 !intel_context_is_schedulable(ce);
912 static int guc_dequeue_one_context(struct intel_guc *guc)
914 struct i915_sched_engine * const sched_engine = guc->sched_engine;
915 struct i915_request *last = NULL;
920 lockdep_assert_held(&sched_engine->lock);
922 if (guc->stalled_request) {
924 last = guc->stalled_request;
926 switch (guc->submission_stall_reason) {
927 case STALL_REGISTER_CONTEXT:
928 goto register_context;
929 case STALL_MOVE_LRC_TAIL:
931 case STALL_ADD_REQUEST:
934 MISSING_CASE(guc->submission_stall_reason);
938 while ((rb = rb_first_cached(&sched_engine->queue))) {
939 struct i915_priolist *p = to_priolist(rb);
940 struct i915_request *rq, *rn;
942 priolist_for_each_request_consume(rq, rn, p) {
943 if (last && !can_merge_rq(rq, last))
944 goto register_context;
946 list_del_init(&rq->sched.link);
948 __i915_request_submit(rq);
950 trace_i915_request_in(rq, 0);
953 if (is_multi_lrc_rq(rq)) {
955 * We need to coalesce all multi-lrc requests in
956 * a relationship into a single H2G. We are
957 * guaranteed that all of these requests will be
958 * submitted sequentially.
960 if (multi_lrc_submit(rq)) {
962 goto register_context;
969 rb_erase_cached(&p->node, &sched_engine->queue);
970 i915_priolist_free(p);
975 struct intel_context *ce = request_to_scheduling_context(last);
977 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
978 intel_context_is_schedulable(ce))) {
979 ret = try_context_registration(ce, false);
980 if (unlikely(ret == -EPIPE)) {
982 } else if (ret == -EBUSY) {
983 guc->stalled_request = last;
984 guc->submission_stall_reason =
985 STALL_REGISTER_CONTEXT;
986 goto schedule_tasklet;
987 } else if (ret != 0) {
988 GEM_WARN_ON(ret); /* Unexpected */
994 if (is_multi_lrc_rq(last)) {
995 ret = guc_wq_item_append(guc, last);
997 goto schedule_tasklet;
998 } else if (ret != 0) {
999 GEM_WARN_ON(ret); /* Unexpected */
1003 guc_set_lrc_tail(last);
1007 ret = guc_add_request(guc, last);
1008 if (unlikely(ret == -EPIPE)) {
1010 } else if (ret == -EBUSY) {
1011 goto schedule_tasklet;
1012 } else if (ret != 0) {
1013 GEM_WARN_ON(ret); /* Unexpected */
1018 guc->stalled_request = NULL;
1019 guc->submission_stall_reason = STALL_NONE;
1023 sched_engine->tasklet.callback = NULL;
1024 tasklet_disable_nosync(&sched_engine->tasklet);
1028 tasklet_schedule(&sched_engine->tasklet);
1032 static void guc_submission_tasklet(struct tasklet_struct *t)
1034 struct i915_sched_engine *sched_engine =
1035 from_tasklet(sched_engine, t, tasklet);
1036 unsigned long flags;
1039 spin_lock_irqsave(&sched_engine->lock, flags);
1042 loop = guc_dequeue_one_context(sched_engine->private_data);
1045 i915_sched_engine_reset_on_empty(sched_engine);
1047 spin_unlock_irqrestore(&sched_engine->lock, flags);
1050 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1052 if (iir & GT_RENDER_USER_INTERRUPT)
1053 intel_engine_signal_breadcrumbs(engine);
1056 static void __guc_context_destroy(struct intel_context *ce);
1057 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1058 static void guc_signal_context_fence(struct intel_context *ce);
1059 static void guc_cancel_context_requests(struct intel_context *ce);
1060 static void guc_blocked_fence_complete(struct intel_context *ce);
1062 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1064 struct intel_context *ce;
1065 unsigned long index, flags;
1066 bool pending_disable, pending_enable, deregister, destroyed, banned;
1068 xa_lock_irqsave(&guc->context_lookup, flags);
1069 xa_for_each(&guc->context_lookup, index, ce) {
1071 * Corner case where the ref count on the object is zero but and
1072 * deregister G2H was lost. In this case we don't touch the ref
1073 * count and finish the destroy of the context.
1075 bool do_put = kref_get_unless_zero(&ce->ref);
1077 xa_unlock(&guc->context_lookup);
1079 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
1080 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
1081 /* successful cancel so jump straight to close it */
1082 intel_context_sched_disable_unpin(ce);
1085 spin_lock(&ce->guc_state.lock);
1088 * Once we are at this point submission_disabled() is guaranteed
1089 * to be visible to all callers who set the below flags (see above
1090 * flush and flushes in reset_prepare). If submission_disabled()
1091 * is set, the caller shouldn't set these flags.
1094 destroyed = context_destroyed(ce);
1095 pending_enable = context_pending_enable(ce);
1096 pending_disable = context_pending_disable(ce);
1097 deregister = context_wait_for_deregister_to_register(ce);
1098 banned = context_banned(ce);
1099 init_sched_state(ce);
1101 spin_unlock(&ce->guc_state.lock);
1103 if (pending_enable || destroyed || deregister) {
1104 decr_outstanding_submission_g2h(guc);
1106 guc_signal_context_fence(ce);
1108 intel_gt_pm_put_async(guc_to_gt(guc));
1109 release_guc_id(guc, ce);
1110 __guc_context_destroy(ce);
1112 if (pending_enable || deregister)
1113 intel_context_put(ce);
1116 /* Not mutualy exclusive with above if statement. */
1117 if (pending_disable) {
1118 guc_signal_context_fence(ce);
1120 guc_cancel_context_requests(ce);
1121 intel_engine_signal_breadcrumbs(ce->engine);
1123 intel_context_sched_disable_unpin(ce);
1124 decr_outstanding_submission_g2h(guc);
1126 spin_lock(&ce->guc_state.lock);
1127 guc_blocked_fence_complete(ce);
1128 spin_unlock(&ce->guc_state.lock);
1130 intel_context_put(ce);
1134 intel_context_put(ce);
1135 xa_lock(&guc->context_lookup);
1137 xa_unlock_irqrestore(&guc->context_lookup, flags);
1141 * GuC stores busyness stats for each engine at context in/out boundaries. A
1142 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1143 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1146 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1147 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1148 * active. For an active engine total busyness = total + (now - start), where
1149 * 'now' is the time at which the busyness is sampled. For inactive engine,
1150 * total busyness = total.
1152 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1154 * The start and total values provided by GuC are 32 bits and wrap around in a
1155 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1156 * increasing ns values, there is a need for this implementation to account for
1157 * overflows and extend the GuC provided values to 64 bits before returning
1158 * busyness to the user. In order to do that, a worker runs periodically at
1159 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1160 * 27 seconds for a gt clock frequency of 19.2 MHz).
1163 #define WRAP_TIME_CLKS U32_MAX
1164 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1167 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1169 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1170 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1172 if (new_start == lower_32_bits(*prev_start))
1176 * When gt is unparked, we update the gt timestamp and start the ping
1177 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1178 * is unparked, all switched in contexts will have a start time that is
1179 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1181 * If neither gt_stamp nor new_start has rolled over, then the
1182 * gt_stamp_hi does not need to be adjusted, however if one of them has
1183 * rolled over, we need to adjust gt_stamp_hi accordingly.
1185 * The below conditions address the cases of new_start rollover and
1186 * gt_stamp_last rollover respectively.
1188 if (new_start < gt_stamp_last &&
1189 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1192 if (new_start > gt_stamp_last &&
1193 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1196 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1199 #define record_read(map_, field_) \
1200 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1203 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1204 * we run into a race where the value read is inconsistent. Sometimes the
1205 * inconsistency is in reading the upper MSB bytes of the last_in value when
1206 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1207 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1208 * determine validity of these values. Instead we read the values multiple times
1209 * until they are consistent. In test runs, 3 attempts results in consistent
1210 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1211 * any new occurences.
1213 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1214 u32 *last_in, u32 *id, u32 *total)
1216 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1220 *last_in = record_read(&rec_map, last_switch_in_stamp);
1221 *id = record_read(&rec_map, current_context_index);
1222 *total = record_read(&rec_map, total_runtime);
1224 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1225 record_read(&rec_map, current_context_index) == *id &&
1226 record_read(&rec_map, total_runtime) == *total)
1231 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1233 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1234 struct intel_guc *guc = &engine->gt->uc.guc;
1235 u32 last_switch, ctx_id, total;
1237 lockdep_assert_held(&guc->timestamp.lock);
1239 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1241 stats->running = ctx_id != ~0U && last_switch;
1243 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1246 * Instead of adjusting the total for overflow, just add the
1247 * difference from previous sample stats->total_gt_clks
1249 if (total && total != ~0U) {
1250 stats->total_gt_clks += (u32)(total - stats->prev_total);
1251 stats->prev_total = total;
1255 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1257 intel_wakeref_t wakeref;
1260 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1261 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1263 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1264 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1269 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1271 struct intel_gt *gt = guc_to_gt(guc);
1272 u32 gt_stamp_lo, gt_stamp_hi;
1275 lockdep_assert_held(&guc->timestamp.lock);
1277 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1278 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1279 MISC_STATUS1) >> guc->timestamp.shift;
1280 gt_stamp_lo = lower_32_bits(gpm_ts);
1283 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1286 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1290 * Unlike the execlist mode of submission total and active times are in terms of
1291 * gt clocks. The *now parameter is retained to return the cpu time at which the
1292 * busyness was sampled.
1294 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1296 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1297 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1298 struct intel_gt *gt = engine->gt;
1299 struct intel_guc *guc = >->uc.guc;
1300 u64 total, gt_stamp_saved;
1301 unsigned long flags;
1305 spin_lock_irqsave(&guc->timestamp.lock, flags);
1308 * If a reset happened, we risk reading partially updated engine
1309 * busyness from GuC, so we just use the driver stored copy of busyness.
1310 * Synchronize with gt reset using reset_count and the
1311 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1312 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1313 * usable by checking the flag afterwards.
1315 reset_count = i915_reset_count(gpu_error);
1316 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1321 * The active busyness depends on start_gt_clk and gt_stamp.
1322 * gt_stamp is updated by i915 only when gt is awake and the
1323 * start_gt_clk is derived from GuC state. To get a consistent
1324 * view of activity, we query the GuC state only if gt is awake.
1326 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1327 stats_saved = *stats;
1328 gt_stamp_saved = guc->timestamp.gt_stamp;
1330 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1331 * start_gt_clk' calculation below for active engines.
1333 guc_update_engine_gt_clks(engine);
1334 guc_update_pm_timestamp(guc, now);
1335 intel_gt_pm_put_async(gt);
1336 if (i915_reset_count(gpu_error) != reset_count) {
1337 *stats = stats_saved;
1338 guc->timestamp.gt_stamp = gt_stamp_saved;
1342 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1343 if (stats->running) {
1344 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1346 total += intel_gt_clock_interval_to_ns(gt, clk);
1349 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1351 return ns_to_ktime(total);
1354 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1356 struct intel_gt *gt = guc_to_gt(guc);
1357 struct intel_engine_cs *engine;
1358 enum intel_engine_id id;
1359 unsigned long flags;
1362 cancel_delayed_work_sync(&guc->timestamp.work);
1364 spin_lock_irqsave(&guc->timestamp.lock, flags);
1366 guc_update_pm_timestamp(guc, &unused);
1367 for_each_engine(engine, gt, id) {
1368 guc_update_engine_gt_clks(engine);
1369 engine->stats.guc.prev_total = 0;
1372 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1375 static void __update_guc_busyness_stats(struct intel_guc *guc)
1377 struct intel_gt *gt = guc_to_gt(guc);
1378 struct intel_engine_cs *engine;
1379 enum intel_engine_id id;
1380 unsigned long flags;
1383 guc->timestamp.last_stat_jiffies = jiffies;
1385 spin_lock_irqsave(&guc->timestamp.lock, flags);
1387 guc_update_pm_timestamp(guc, &unused);
1388 for_each_engine(engine, gt, id)
1389 guc_update_engine_gt_clks(engine);
1391 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1394 static void guc_timestamp_ping(struct work_struct *wrk)
1396 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1397 timestamp.work.work);
1398 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1399 struct intel_gt *gt = guc_to_gt(guc);
1400 intel_wakeref_t wakeref;
1404 * Synchronize with gt reset to make sure the worker does not
1405 * corrupt the engine/guc stats. NB: can't actually block waiting
1406 * for a reset to complete as the reset requires flushing out
1407 * this worker thread if started. So waiting would deadlock.
1409 ret = intel_gt_reset_trylock(gt, &srcu);
1413 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1414 __update_guc_busyness_stats(guc);
1416 intel_gt_reset_unlock(gt, srcu);
1418 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1419 guc->timestamp.ping_delay);
1422 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1424 u32 offset = intel_guc_engine_usage_offset(guc);
1426 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1431 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1434 static void guc_init_engine_stats(struct intel_guc *guc)
1436 struct intel_gt *gt = guc_to_gt(guc);
1437 intel_wakeref_t wakeref;
1439 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1440 guc->timestamp.ping_delay);
1442 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
1443 int ret = guc_action_enable_usage_stats(guc);
1446 drm_err(>->i915->drm,
1447 "Failed to enable usage stats: %d!\n", ret);
1451 void intel_guc_busyness_park(struct intel_gt *gt)
1453 struct intel_guc *guc = >->uc.guc;
1455 if (!guc_submission_initialized(guc))
1459 * There is a race with suspend flow where the worker runs after suspend
1460 * and causes an unclaimed register access warning. Cancel the worker
1461 * synchronously here.
1463 cancel_delayed_work_sync(&guc->timestamp.work);
1466 * Before parking, we should sample engine busyness stats if we need to.
1467 * We can skip it if we are less than half a ping from the last time we
1468 * sampled the busyness stats.
1470 if (guc->timestamp.last_stat_jiffies &&
1471 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1472 (guc->timestamp.ping_delay / 2)))
1475 __update_guc_busyness_stats(guc);
1478 void intel_guc_busyness_unpark(struct intel_gt *gt)
1480 struct intel_guc *guc = >->uc.guc;
1481 unsigned long flags;
1484 if (!guc_submission_initialized(guc))
1487 spin_lock_irqsave(&guc->timestamp.lock, flags);
1488 guc_update_pm_timestamp(guc, &unused);
1489 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1490 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1491 guc->timestamp.ping_delay);
1495 submission_disabled(struct intel_guc *guc)
1497 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1499 return unlikely(!sched_engine ||
1500 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1501 intel_gt_is_wedged(guc_to_gt(guc)));
1504 static void disable_submission(struct intel_guc *guc)
1506 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1508 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1509 GEM_BUG_ON(!guc->ct.enabled);
1510 __tasklet_disable_sync_once(&sched_engine->tasklet);
1511 sched_engine->tasklet.callback = NULL;
1515 static void enable_submission(struct intel_guc *guc)
1517 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1518 unsigned long flags;
1520 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1521 sched_engine->tasklet.callback = guc_submission_tasklet;
1522 wmb(); /* Make sure callback visible */
1523 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1524 __tasklet_enable(&sched_engine->tasklet)) {
1525 GEM_BUG_ON(!guc->ct.enabled);
1527 /* And kick in case we missed a new request submission. */
1528 tasklet_hi_schedule(&sched_engine->tasklet);
1530 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1533 static void guc_flush_submissions(struct intel_guc *guc)
1535 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1536 unsigned long flags;
1538 spin_lock_irqsave(&sched_engine->lock, flags);
1539 spin_unlock_irqrestore(&sched_engine->lock, flags);
1542 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1544 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1546 if (unlikely(!guc_submission_initialized(guc))) {
1547 /* Reset called during driver load? GuC not yet initialised! */
1551 intel_gt_park_heartbeats(guc_to_gt(guc));
1552 disable_submission(guc);
1553 guc->interrupts.disable(guc);
1554 __reset_guc_busyness_stats(guc);
1556 /* Flush IRQ handler */
1557 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1558 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1560 guc_flush_submissions(guc);
1561 guc_flush_destroyed_contexts(guc);
1562 flush_work(&guc->ct.requests.worker);
1564 scrub_guc_desc_for_outstanding_g2h(guc);
1567 static struct intel_engine_cs *
1568 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1570 struct intel_engine_cs *engine;
1571 intel_engine_mask_t tmp, mask = ve->mask;
1572 unsigned int num_siblings = 0;
1574 for_each_engine_masked(engine, ve->gt, mask, tmp)
1575 if (num_siblings++ == sibling)
1581 static inline struct intel_engine_cs *
1582 __context_to_physical_engine(struct intel_context *ce)
1584 struct intel_engine_cs *engine = ce->engine;
1586 if (intel_engine_is_virtual(engine))
1587 engine = guc_virtual_get_sibling(engine, 0);
1592 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1594 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1596 if (!intel_context_is_schedulable(ce))
1599 GEM_BUG_ON(!intel_context_is_pinned(ce));
1602 * We want a simple context + ring to execute the breadcrumb update.
1603 * We cannot rely on the context being intact across the GPU hang,
1604 * so clear it and rebuild just what we need for the breadcrumb.
1605 * All pending requests for this context will be zapped, and any
1606 * future request will be after userspace has had the opportunity
1607 * to recreate its own state.
1610 lrc_init_regs(ce, engine, true);
1612 /* Rerun the request; its payload has been neutered (if guilty). */
1613 lrc_update_regs(ce, engine, head);
1616 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1618 if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
1621 intel_engine_stop_cs(engine);
1624 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
1625 * to wait for any pending mi force wakeups
1627 intel_engine_wait_for_pending_mi_fw(engine);
1630 static void guc_reset_nop(struct intel_engine_cs *engine)
1634 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1639 __unwind_incomplete_requests(struct intel_context *ce)
1641 struct i915_request *rq, *rn;
1642 struct list_head *pl;
1643 int prio = I915_PRIORITY_INVALID;
1644 struct i915_sched_engine * const sched_engine =
1645 ce->engine->sched_engine;
1646 unsigned long flags;
1648 spin_lock_irqsave(&sched_engine->lock, flags);
1649 spin_lock(&ce->guc_state.lock);
1650 list_for_each_entry_safe_reverse(rq, rn,
1651 &ce->guc_state.requests,
1653 if (i915_request_completed(rq))
1656 list_del_init(&rq->sched.link);
1657 __i915_request_unsubmit(rq);
1659 /* Push the request back into the queue for later resubmission. */
1660 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1661 if (rq_prio(rq) != prio) {
1663 pl = i915_sched_lookup_priolist(sched_engine, prio);
1665 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1667 list_add(&rq->sched.link, pl);
1668 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1670 spin_unlock(&ce->guc_state.lock);
1671 spin_unlock_irqrestore(&sched_engine->lock, flags);
1674 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1677 struct i915_request *rq;
1678 unsigned long flags;
1680 int i, number_children = ce->parallel.number_children;
1681 struct intel_context *parent = ce;
1683 GEM_BUG_ON(intel_context_is_child(ce));
1685 intel_context_get(ce);
1688 * GuC will implicitly mark the context as non-schedulable when it sends
1689 * the reset notification. Make sure our state reflects this change. The
1690 * context will be marked enabled on resubmission.
1692 spin_lock_irqsave(&ce->guc_state.lock, flags);
1693 clr_context_enabled(ce);
1694 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1697 * For each context in the relationship find the hanging request
1698 * resetting each context / request as needed
1700 for (i = 0; i < number_children + 1; ++i) {
1701 if (!intel_context_is_pinned(ce))
1705 rq = intel_context_find_active_request(ce);
1707 head = ce->ring->tail;
1711 if (i915_request_started(rq))
1712 guilty = stalled & ce->engine->mask;
1714 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1715 head = intel_ring_wrap(ce->ring, rq->head);
1717 __i915_request_reset(rq, guilty);
1719 guc_reset_state(ce, head, guilty);
1721 if (i != number_children)
1722 ce = list_next_entry(ce, parallel.child_link);
1725 __unwind_incomplete_requests(parent);
1726 intel_context_put(parent);
1729 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1731 struct intel_context *ce;
1732 unsigned long index;
1733 unsigned long flags;
1735 if (unlikely(!guc_submission_initialized(guc))) {
1736 /* Reset called during driver load? GuC not yet initialised! */
1740 xa_lock_irqsave(&guc->context_lookup, flags);
1741 xa_for_each(&guc->context_lookup, index, ce) {
1742 if (!kref_get_unless_zero(&ce->ref))
1745 xa_unlock(&guc->context_lookup);
1747 if (intel_context_is_pinned(ce) &&
1748 !intel_context_is_child(ce))
1749 __guc_reset_context(ce, stalled);
1751 intel_context_put(ce);
1753 xa_lock(&guc->context_lookup);
1755 xa_unlock_irqrestore(&guc->context_lookup, flags);
1757 /* GuC is blown away, drop all references to contexts */
1758 xa_destroy(&guc->context_lookup);
1761 static void guc_cancel_context_requests(struct intel_context *ce)
1763 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1764 struct i915_request *rq;
1765 unsigned long flags;
1767 /* Mark all executing requests as skipped. */
1768 spin_lock_irqsave(&sched_engine->lock, flags);
1769 spin_lock(&ce->guc_state.lock);
1770 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1771 i915_request_put(i915_request_mark_eio(rq));
1772 spin_unlock(&ce->guc_state.lock);
1773 spin_unlock_irqrestore(&sched_engine->lock, flags);
1777 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1779 struct i915_request *rq, *rn;
1781 unsigned long flags;
1783 /* Can be called during boot if GuC fails to load */
1788 * Before we call engine->cancel_requests(), we should have exclusive
1789 * access to the submission state. This is arranged for us by the
1790 * caller disabling the interrupt generation, the tasklet and other
1791 * threads that may then access the same state, giving us a free hand
1792 * to reset state. However, we still need to let lockdep be aware that
1793 * we know this state may be accessed in hardirq context, so we
1794 * disable the irq around this manipulation and we want to keep
1795 * the spinlock focused on its duties and not accidentally conflate
1796 * coverage to the submission's irq state. (Similarly, although we
1797 * shouldn't need to disable irq around the manipulation of the
1798 * submission's irq state, we also wish to remind ourselves that
1801 spin_lock_irqsave(&sched_engine->lock, flags);
1803 /* Flush the queued requests to the timeline list (for retiring). */
1804 while ((rb = rb_first_cached(&sched_engine->queue))) {
1805 struct i915_priolist *p = to_priolist(rb);
1807 priolist_for_each_request_consume(rq, rn, p) {
1808 list_del_init(&rq->sched.link);
1810 __i915_request_submit(rq);
1812 i915_request_put(i915_request_mark_eio(rq));
1815 rb_erase_cached(&p->node, &sched_engine->queue);
1816 i915_priolist_free(p);
1819 /* Remaining _unready_ requests will be nop'ed when submitted */
1821 sched_engine->queue_priority_hint = INT_MIN;
1822 sched_engine->queue = RB_ROOT_CACHED;
1824 spin_unlock_irqrestore(&sched_engine->lock, flags);
1827 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1829 struct intel_context *ce;
1830 unsigned long index;
1831 unsigned long flags;
1833 xa_lock_irqsave(&guc->context_lookup, flags);
1834 xa_for_each(&guc->context_lookup, index, ce) {
1835 if (!kref_get_unless_zero(&ce->ref))
1838 xa_unlock(&guc->context_lookup);
1840 if (intel_context_is_pinned(ce) &&
1841 !intel_context_is_child(ce))
1842 guc_cancel_context_requests(ce);
1844 intel_context_put(ce);
1846 xa_lock(&guc->context_lookup);
1848 xa_unlock_irqrestore(&guc->context_lookup, flags);
1850 guc_cancel_sched_engine_requests(guc->sched_engine);
1852 /* GuC is blown away, drop all references to contexts */
1853 xa_destroy(&guc->context_lookup);
1856 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1858 /* Reset called during driver load or during wedge? */
1859 if (unlikely(!guc_submission_initialized(guc) ||
1860 intel_gt_is_wedged(guc_to_gt(guc)))) {
1865 * Technically possible for either of these values to be non-zero here,
1866 * but very unlikely + harmless. Regardless let's add a warn so we can
1867 * see in CI if this happens frequently / a precursor to taking down the
1870 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1871 atomic_set(&guc->outstanding_submission_g2h, 0);
1873 intel_guc_global_policies_update(guc);
1874 enable_submission(guc);
1875 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1878 static void destroyed_worker_func(struct work_struct *w);
1879 static void reset_fail_worker_func(struct work_struct *w);
1882 * Set up the memory resources to be shared with the GuC (via the GGTT)
1883 * at firmware loading time.
1885 int intel_guc_submission_init(struct intel_guc *guc)
1887 struct intel_gt *gt = guc_to_gt(guc);
1890 if (guc->submission_initialized)
1893 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) {
1894 ret = guc_lrc_desc_pool_create_v69(guc);
1899 guc->submission_state.guc_ids_bitmap =
1900 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1901 if (!guc->submission_state.guc_ids_bitmap) {
1906 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1907 guc->timestamp.shift = gpm_timestamp_shift(gt);
1908 guc->submission_initialized = true;
1913 guc_lrc_desc_pool_destroy_v69(guc);
1918 void intel_guc_submission_fini(struct intel_guc *guc)
1920 if (!guc->submission_initialized)
1923 guc_flush_destroyed_contexts(guc);
1924 guc_lrc_desc_pool_destroy_v69(guc);
1925 i915_sched_engine_put(guc->sched_engine);
1926 bitmap_free(guc->submission_state.guc_ids_bitmap);
1927 guc->submission_initialized = false;
1930 static inline void queue_request(struct i915_sched_engine *sched_engine,
1931 struct i915_request *rq,
1934 GEM_BUG_ON(!list_empty(&rq->sched.link));
1935 list_add_tail(&rq->sched.link,
1936 i915_sched_lookup_priolist(sched_engine, prio));
1937 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1938 tasklet_hi_schedule(&sched_engine->tasklet);
1941 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1942 struct i915_request *rq)
1946 __i915_request_submit(rq);
1948 trace_i915_request_in(rq, 0);
1950 if (is_multi_lrc_rq(rq)) {
1951 if (multi_lrc_submit(rq)) {
1952 ret = guc_wq_item_append(guc, rq);
1954 ret = guc_add_request(guc, rq);
1957 guc_set_lrc_tail(rq);
1958 ret = guc_add_request(guc, rq);
1961 if (unlikely(ret == -EPIPE))
1962 disable_submission(guc);
1967 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1969 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1970 struct intel_context *ce = request_to_scheduling_context(rq);
1972 return submission_disabled(guc) || guc->stalled_request ||
1973 !i915_sched_engine_is_empty(sched_engine) ||
1974 !ctx_id_mapped(guc, ce->guc_id.id);
1977 static void guc_submit_request(struct i915_request *rq)
1979 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1980 struct intel_guc *guc = &rq->engine->gt->uc.guc;
1981 unsigned long flags;
1983 /* Will be called from irq-context when using foreign fences. */
1984 spin_lock_irqsave(&sched_engine->lock, flags);
1986 if (need_tasklet(guc, rq))
1987 queue_request(sched_engine, rq, rq_prio(rq));
1988 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1989 tasklet_hi_schedule(&sched_engine->tasklet);
1991 spin_unlock_irqrestore(&sched_engine->lock, flags);
1994 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1998 GEM_BUG_ON(intel_context_is_child(ce));
2000 if (intel_context_is_parent(ce))
2001 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2002 NUMBER_MULTI_LRC_GUC_ID(guc),
2003 order_base_2(ce->parallel.number_children
2006 ret = ida_simple_get(&guc->submission_state.guc_ids,
2007 NUMBER_MULTI_LRC_GUC_ID(guc),
2008 guc->submission_state.num_guc_ids,
2009 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2011 if (unlikely(ret < 0))
2014 if (!intel_context_is_parent(ce))
2015 ++guc->submission_state.guc_ids_in_use;
2017 ce->guc_id.id = ret;
2021 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2023 GEM_BUG_ON(intel_context_is_child(ce));
2025 if (!context_guc_id_invalid(ce)) {
2026 if (intel_context_is_parent(ce)) {
2027 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2029 order_base_2(ce->parallel.number_children
2032 --guc->submission_state.guc_ids_in_use;
2033 ida_simple_remove(&guc->submission_state.guc_ids,
2036 clr_ctx_id_mapping(guc, ce->guc_id.id);
2037 set_context_guc_id_invalid(ce);
2039 if (!list_empty(&ce->guc_id.link))
2040 list_del_init(&ce->guc_id.link);
2043 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2045 unsigned long flags;
2047 spin_lock_irqsave(&guc->submission_state.lock, flags);
2048 __release_guc_id(guc, ce);
2049 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2052 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2054 struct intel_context *cn;
2056 lockdep_assert_held(&guc->submission_state.lock);
2057 GEM_BUG_ON(intel_context_is_child(ce));
2058 GEM_BUG_ON(intel_context_is_parent(ce));
2060 if (!list_empty(&guc->submission_state.guc_id_list)) {
2061 cn = list_first_entry(&guc->submission_state.guc_id_list,
2062 struct intel_context,
2065 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2066 GEM_BUG_ON(context_guc_id_invalid(cn));
2067 GEM_BUG_ON(intel_context_is_child(cn));
2068 GEM_BUG_ON(intel_context_is_parent(cn));
2070 list_del_init(&cn->guc_id.link);
2071 ce->guc_id.id = cn->guc_id.id;
2073 spin_lock(&cn->guc_state.lock);
2074 clr_context_registered(cn);
2075 spin_unlock(&cn->guc_state.lock);
2077 set_context_guc_id_invalid(cn);
2079 #ifdef CONFIG_DRM_I915_SELFTEST
2080 guc->number_guc_id_stolen++;
2089 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2093 lockdep_assert_held(&guc->submission_state.lock);
2094 GEM_BUG_ON(intel_context_is_child(ce));
2096 ret = new_guc_id(guc, ce);
2097 if (unlikely(ret < 0)) {
2098 if (intel_context_is_parent(ce))
2101 ret = steal_guc_id(guc, ce);
2106 if (intel_context_is_parent(ce)) {
2107 struct intel_context *child;
2110 for_each_child(ce, child)
2111 child->guc_id.id = ce->guc_id.id + i++;
2117 #define PIN_GUC_ID_TRIES 4
2118 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2121 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2123 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2126 spin_lock_irqsave(&guc->submission_state.lock, flags);
2128 might_lock(&ce->guc_state.lock);
2130 if (context_guc_id_invalid(ce)) {
2131 ret = assign_guc_id(guc, ce);
2134 ret = 1; /* Indidcates newly assigned guc_id */
2136 if (!list_empty(&ce->guc_id.link))
2137 list_del_init(&ce->guc_id.link);
2138 atomic_inc(&ce->guc_id.ref);
2141 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2144 * -EAGAIN indicates no guc_id are available, let's retire any
2145 * outstanding requests to see if that frees up a guc_id. If the first
2146 * retire didn't help, insert a sleep with the timeslice duration before
2147 * attempting to retire more requests. Double the sleep period each
2148 * subsequent pass before finally giving up. The sleep period has max of
2149 * 100ms and minimum of 1ms.
2151 if (ret == -EAGAIN && --tries) {
2152 if (PIN_GUC_ID_TRIES - tries > 1) {
2153 unsigned int timeslice_shifted =
2154 ce->engine->props.timeslice_duration_ms <<
2155 (PIN_GUC_ID_TRIES - tries - 2);
2156 unsigned int max = min_t(unsigned int, 100,
2159 msleep(max_t(unsigned int, max, 1));
2161 intel_gt_retire_requests(guc_to_gt(guc));
2168 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2170 unsigned long flags;
2172 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2173 GEM_BUG_ON(intel_context_is_child(ce));
2175 if (unlikely(context_guc_id_invalid(ce) ||
2176 intel_context_is_parent(ce)))
2179 spin_lock_irqsave(&guc->submission_state.lock, flags);
2180 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2181 !atomic_read(&ce->guc_id.ref))
2182 list_add_tail(&ce->guc_id.link,
2183 &guc->submission_state.guc_id_list);
2184 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2187 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2188 struct intel_context *ce,
2193 struct intel_context *child;
2194 u32 action[4 + MAX_ENGINE_INSTANCE];
2197 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2199 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2200 action[len++] = guc_id;
2201 action[len++] = ce->parallel.number_children + 1;
2202 action[len++] = offset;
2203 for_each_child(ce, child) {
2204 offset += sizeof(struct guc_lrc_desc_v69);
2205 action[len++] = offset;
2208 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2211 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2212 struct intel_context *ce,
2213 struct guc_ctxt_registration_info *info,
2216 struct intel_context *child;
2217 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2221 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2223 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2224 action[len++] = info->flags;
2225 action[len++] = info->context_idx;
2226 action[len++] = info->engine_class;
2227 action[len++] = info->engine_submit_mask;
2228 action[len++] = info->wq_desc_lo;
2229 action[len++] = info->wq_desc_hi;
2230 action[len++] = info->wq_base_lo;
2231 action[len++] = info->wq_base_hi;
2232 action[len++] = info->wq_size;
2233 action[len++] = ce->parallel.number_children + 1;
2234 action[len++] = info->hwlrca_lo;
2235 action[len++] = info->hwlrca_hi;
2237 next_id = info->context_idx + 1;
2238 for_each_child(ce, child) {
2239 GEM_BUG_ON(next_id++ != child->guc_id.id);
2242 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2243 * only supports 32 bit currently.
2245 action[len++] = lower_32_bits(child->lrc.lrca);
2246 action[len++] = upper_32_bits(child->lrc.lrca);
2249 GEM_BUG_ON(len > ARRAY_SIZE(action));
2251 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2254 static int __guc_action_register_context_v69(struct intel_guc *guc,
2260 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2265 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2269 static int __guc_action_register_context_v70(struct intel_guc *guc,
2270 struct guc_ctxt_registration_info *info,
2274 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2278 info->engine_submit_mask,
2288 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2292 static void prepare_context_registration_info_v69(struct intel_context *ce);
2293 static void prepare_context_registration_info_v70(struct intel_context *ce,
2294 struct guc_ctxt_registration_info *info);
2297 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2299 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2300 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2302 prepare_context_registration_info_v69(ce);
2304 if (intel_context_is_parent(ce))
2305 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2308 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2313 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2315 struct guc_ctxt_registration_info info;
2317 prepare_context_registration_info_v70(ce, &info);
2319 if (intel_context_is_parent(ce))
2320 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2322 return __guc_action_register_context_v70(guc, &info, loop);
2325 static int register_context(struct intel_context *ce, bool loop)
2327 struct intel_guc *guc = ce_to_guc(ce);
2330 GEM_BUG_ON(intel_context_is_child(ce));
2331 trace_intel_context_register(ce);
2333 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2334 ret = register_context_v70(guc, ce, loop);
2336 ret = register_context_v69(guc, ce, loop);
2339 unsigned long flags;
2341 spin_lock_irqsave(&ce->guc_state.lock, flags);
2342 set_context_registered(ce);
2343 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2345 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2346 guc_context_policy_init_v70(ce, loop);
2352 static int __guc_action_deregister_context(struct intel_guc *guc,
2356 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2360 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2361 G2H_LEN_DW_DEREGISTER_CONTEXT,
2365 static int deregister_context(struct intel_context *ce, u32 guc_id)
2367 struct intel_guc *guc = ce_to_guc(ce);
2369 GEM_BUG_ON(intel_context_is_child(ce));
2370 trace_intel_context_deregister(ce);
2372 return __guc_action_deregister_context(guc, guc_id);
2375 static inline void clear_children_join_go_memory(struct intel_context *ce)
2377 struct parent_scratch *ps = __get_parent_scratch(ce);
2380 ps->go.semaphore = 0;
2381 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2382 ps->join[i].semaphore = 0;
2385 static inline u32 get_children_go_value(struct intel_context *ce)
2387 return __get_parent_scratch(ce)->go.semaphore;
2390 static inline u32 get_children_join_value(struct intel_context *ce,
2393 return __get_parent_scratch(ce)->join[child_index].semaphore;
2396 struct context_policy {
2398 struct guc_update_context_policy h2g;
2401 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2403 size_t bytes = sizeof(policy->h2g.header) +
2404 (sizeof(policy->h2g.klv[0]) * policy->count);
2406 return bytes / sizeof(u32);
2409 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2411 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2412 policy->h2g.header.ctx_id = guc_id;
2416 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2417 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2419 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2420 policy->h2g.klv[policy->count].kl = \
2421 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2422 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2423 policy->h2g.klv[policy->count].value = data; \
2427 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2428 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2429 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2430 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2432 #undef MAKE_CONTEXT_POLICY_ADD
2434 static int __guc_context_set_context_policies(struct intel_guc *guc,
2435 struct context_policy *policy,
2438 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2439 __guc_context_policy_action_size(policy),
2443 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2445 struct intel_engine_cs *engine = ce->engine;
2446 struct intel_guc *guc = &engine->gt->uc.guc;
2447 struct context_policy policy;
2448 u32 execution_quantum;
2449 u32 preemption_timeout;
2450 unsigned long flags;
2453 /* NB: For both of these, zero means disabled. */
2454 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2455 execution_quantum));
2456 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2457 preemption_timeout));
2458 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2459 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2461 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2463 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2464 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2465 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2467 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2468 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2470 ret = __guc_context_set_context_policies(guc, &policy, loop);
2472 spin_lock_irqsave(&ce->guc_state.lock, flags);
2474 set_context_policy_required(ce);
2476 clr_context_policy_required(ce);
2477 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2482 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2483 struct guc_lrc_desc_v69 *desc)
2485 desc->policy_flags = 0;
2487 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2488 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2490 /* NB: For both of these, zero means disabled. */
2491 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2492 desc->execution_quantum));
2493 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2494 desc->preemption_timeout));
2495 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2496 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2499 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2502 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2503 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2509 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2510 return GEN12_CTX_PRIORITY_NORMAL;
2511 case GUC_CLIENT_PRIORITY_NORMAL:
2512 return GEN12_CTX_PRIORITY_LOW;
2513 case GUC_CLIENT_PRIORITY_HIGH:
2514 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2515 return GEN12_CTX_PRIORITY_HIGH;
2519 static void prepare_context_registration_info_v69(struct intel_context *ce)
2521 struct intel_engine_cs *engine = ce->engine;
2522 struct intel_guc *guc = &engine->gt->uc.guc;
2523 u32 ctx_id = ce->guc_id.id;
2524 struct guc_lrc_desc_v69 *desc;
2525 struct intel_context *child;
2527 GEM_BUG_ON(!engine->mask);
2530 * Ensure LRC + CT vmas are is same region as write barrier is done
2531 * based on CT vma region.
2533 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2534 i915_gem_object_is_lmem(ce->ring->vma->obj));
2536 desc = __get_lrc_desc_v69(guc, ctx_id);
2537 desc->engine_class = engine_class_to_guc_class(engine->class);
2538 desc->engine_submit_mask = engine->logical_mask;
2539 desc->hw_context_desc = ce->lrc.lrca;
2540 desc->priority = ce->guc_state.prio;
2541 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2542 guc_context_policy_init_v69(engine, desc);
2545 * If context is a parent, we need to register a process descriptor
2546 * describing a work queue and register all child contexts.
2548 if (intel_context_is_parent(ce)) {
2549 struct guc_process_desc_v69 *pdesc;
2551 ce->parallel.guc.wqi_tail = 0;
2552 ce->parallel.guc.wqi_head = 0;
2554 desc->process_desc = i915_ggtt_offset(ce->state) +
2555 __get_parent_scratch_offset(ce);
2556 desc->wq_addr = i915_ggtt_offset(ce->state) +
2557 __get_wq_offset(ce);
2558 desc->wq_size = WQ_SIZE;
2560 pdesc = __get_process_desc_v69(ce);
2561 memset(pdesc, 0, sizeof(*(pdesc)));
2562 pdesc->stage_id = ce->guc_id.id;
2563 pdesc->wq_base_addr = desc->wq_addr;
2564 pdesc->wq_size_bytes = desc->wq_size;
2565 pdesc->wq_status = WQ_STATUS_ACTIVE;
2567 ce->parallel.guc.wq_head = &pdesc->head;
2568 ce->parallel.guc.wq_tail = &pdesc->tail;
2569 ce->parallel.guc.wq_status = &pdesc->wq_status;
2571 for_each_child(ce, child) {
2572 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2574 desc->engine_class =
2575 engine_class_to_guc_class(engine->class);
2576 desc->hw_context_desc = child->lrc.lrca;
2577 desc->priority = ce->guc_state.prio;
2578 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2579 guc_context_policy_init_v69(engine, desc);
2582 clear_children_join_go_memory(ce);
2586 static void prepare_context_registration_info_v70(struct intel_context *ce,
2587 struct guc_ctxt_registration_info *info)
2589 struct intel_engine_cs *engine = ce->engine;
2590 struct intel_guc *guc = &engine->gt->uc.guc;
2591 u32 ctx_id = ce->guc_id.id;
2593 GEM_BUG_ON(!engine->mask);
2596 * Ensure LRC + CT vmas are is same region as write barrier is done
2597 * based on CT vma region.
2599 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2600 i915_gem_object_is_lmem(ce->ring->vma->obj));
2602 memset(info, 0, sizeof(*info));
2603 info->context_idx = ctx_id;
2604 info->engine_class = engine_class_to_guc_class(engine->class);
2605 info->engine_submit_mask = engine->logical_mask;
2607 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2608 * only supports 32 bit currently.
2610 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2611 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2612 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2613 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2614 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2617 * If context is a parent, we need to register a process descriptor
2618 * describing a work queue and register all child contexts.
2620 if (intel_context_is_parent(ce)) {
2621 struct guc_sched_wq_desc *wq_desc;
2622 u64 wq_desc_offset, wq_base_offset;
2624 ce->parallel.guc.wqi_tail = 0;
2625 ce->parallel.guc.wqi_head = 0;
2627 wq_desc_offset = i915_ggtt_offset(ce->state) +
2628 __get_parent_scratch_offset(ce);
2629 wq_base_offset = i915_ggtt_offset(ce->state) +
2630 __get_wq_offset(ce);
2631 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2632 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2633 info->wq_base_lo = lower_32_bits(wq_base_offset);
2634 info->wq_base_hi = upper_32_bits(wq_base_offset);
2635 info->wq_size = WQ_SIZE;
2637 wq_desc = __get_wq_desc_v70(ce);
2638 memset(wq_desc, 0, sizeof(*wq_desc));
2639 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2641 ce->parallel.guc.wq_head = &wq_desc->head;
2642 ce->parallel.guc.wq_tail = &wq_desc->tail;
2643 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2645 clear_children_join_go_memory(ce);
2649 static int try_context_registration(struct intel_context *ce, bool loop)
2651 struct intel_engine_cs *engine = ce->engine;
2652 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2653 struct intel_guc *guc = &engine->gt->uc.guc;
2654 intel_wakeref_t wakeref;
2655 u32 ctx_id = ce->guc_id.id;
2656 bool context_registered;
2659 GEM_BUG_ON(!sched_state_is_init(ce));
2661 context_registered = ctx_id_mapped(guc, ctx_id);
2663 clr_ctx_id_mapping(guc, ctx_id);
2664 set_ctx_id_mapping(guc, ctx_id, ce);
2667 * The context_lookup xarray is used to determine if the hardware
2668 * context is currently registered. There are two cases in which it
2669 * could be registered either the guc_id has been stolen from another
2670 * context or the lrc descriptor address of this context has changed. In
2671 * either case the context needs to be deregistered with the GuC before
2672 * registering this context.
2674 if (context_registered) {
2676 unsigned long flags;
2678 trace_intel_context_steal_guc_id(ce);
2681 /* Seal race with Reset */
2682 spin_lock_irqsave(&ce->guc_state.lock, flags);
2683 disabled = submission_disabled(guc);
2684 if (likely(!disabled)) {
2685 set_context_wait_for_deregister_to_register(ce);
2686 intel_context_get(ce);
2688 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2689 if (unlikely(disabled)) {
2690 clr_ctx_id_mapping(guc, ctx_id);
2691 return 0; /* Will get registered later */
2695 * If stealing the guc_id, this ce has the same guc_id as the
2696 * context whose guc_id was stolen.
2698 with_intel_runtime_pm(runtime_pm, wakeref)
2699 ret = deregister_context(ce, ce->guc_id.id);
2700 if (unlikely(ret == -ENODEV))
2701 ret = 0; /* Will get registered later */
2703 with_intel_runtime_pm(runtime_pm, wakeref)
2704 ret = register_context(ce, loop);
2705 if (unlikely(ret == -EBUSY)) {
2706 clr_ctx_id_mapping(guc, ctx_id);
2707 } else if (unlikely(ret == -ENODEV)) {
2708 clr_ctx_id_mapping(guc, ctx_id);
2709 ret = 0; /* Will get registered later */
2716 static int __guc_context_pre_pin(struct intel_context *ce,
2717 struct intel_engine_cs *engine,
2718 struct i915_gem_ww_ctx *ww,
2721 return lrc_pre_pin(ce, engine, ww, vaddr);
2724 static int __guc_context_pin(struct intel_context *ce,
2725 struct intel_engine_cs *engine,
2728 if (i915_ggtt_offset(ce->state) !=
2729 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2730 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2733 * GuC context gets pinned in guc_request_alloc. See that function for
2734 * explaination of why.
2737 return lrc_pin(ce, engine, vaddr);
2740 static int guc_context_pre_pin(struct intel_context *ce,
2741 struct i915_gem_ww_ctx *ww,
2744 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2747 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2749 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2751 if (likely(!ret && !intel_context_is_barrier(ce)))
2752 intel_engine_pm_get(ce->engine);
2757 static void guc_context_unpin(struct intel_context *ce)
2759 struct intel_guc *guc = ce_to_guc(ce);
2761 unpin_guc_id(guc, ce);
2764 if (likely(!intel_context_is_barrier(ce)))
2765 intel_engine_pm_put_async(ce->engine);
2768 static void guc_context_post_unpin(struct intel_context *ce)
2773 static void __guc_context_sched_enable(struct intel_guc *guc,
2774 struct intel_context *ce)
2777 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2782 trace_intel_context_sched_enable(ce);
2784 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2785 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2788 static void __guc_context_sched_disable(struct intel_guc *guc,
2789 struct intel_context *ce,
2793 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2794 guc_id, /* ce->guc_id.id not stable */
2798 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2800 GEM_BUG_ON(intel_context_is_child(ce));
2801 trace_intel_context_sched_disable(ce);
2803 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2804 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2807 static void guc_blocked_fence_complete(struct intel_context *ce)
2809 lockdep_assert_held(&ce->guc_state.lock);
2811 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2812 i915_sw_fence_complete(&ce->guc_state.blocked);
2815 static void guc_blocked_fence_reinit(struct intel_context *ce)
2817 lockdep_assert_held(&ce->guc_state.lock);
2818 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2821 * This fence is always complete unless a pending schedule disable is
2822 * outstanding. We arm the fence here and complete it when we receive
2823 * the pending schedule disable complete message.
2825 i915_sw_fence_fini(&ce->guc_state.blocked);
2826 i915_sw_fence_reinit(&ce->guc_state.blocked);
2827 i915_sw_fence_await(&ce->guc_state.blocked);
2828 i915_sw_fence_commit(&ce->guc_state.blocked);
2831 static u16 prep_context_pending_disable(struct intel_context *ce)
2833 lockdep_assert_held(&ce->guc_state.lock);
2835 set_context_pending_disable(ce);
2836 clr_context_enabled(ce);
2837 guc_blocked_fence_reinit(ce);
2838 intel_context_get(ce);
2840 return ce->guc_id.id;
2843 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2845 struct intel_guc *guc = ce_to_guc(ce);
2846 unsigned long flags;
2847 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2848 intel_wakeref_t wakeref;
2852 GEM_BUG_ON(intel_context_is_child(ce));
2854 spin_lock_irqsave(&ce->guc_state.lock, flags);
2856 incr_context_blocked(ce);
2858 enabled = context_enabled(ce);
2859 if (unlikely(!enabled || submission_disabled(guc))) {
2861 clr_context_enabled(ce);
2862 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2863 return &ce->guc_state.blocked;
2867 * We add +2 here as the schedule disable complete CTB handler calls
2868 * intel_context_sched_disable_unpin (-2 to pin_count).
2870 atomic_add(2, &ce->pin_count);
2872 guc_id = prep_context_pending_disable(ce);
2874 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2876 with_intel_runtime_pm(runtime_pm, wakeref)
2877 __guc_context_sched_disable(guc, ce, guc_id);
2879 return &ce->guc_state.blocked;
2882 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2883 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2884 #define SCHED_STATE_NO_UNBLOCK \
2885 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2886 SCHED_STATE_PENDING_DISABLE | \
2889 static bool context_cant_unblock(struct intel_context *ce)
2891 lockdep_assert_held(&ce->guc_state.lock);
2893 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2894 context_guc_id_invalid(ce) ||
2895 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2896 !intel_context_is_pinned(ce);
2899 static void guc_context_unblock(struct intel_context *ce)
2901 struct intel_guc *guc = ce_to_guc(ce);
2902 unsigned long flags;
2903 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2904 intel_wakeref_t wakeref;
2907 GEM_BUG_ON(context_enabled(ce));
2908 GEM_BUG_ON(intel_context_is_child(ce));
2910 spin_lock_irqsave(&ce->guc_state.lock, flags);
2912 if (unlikely(submission_disabled(guc) ||
2913 context_cant_unblock(ce))) {
2917 set_context_pending_enable(ce);
2918 set_context_enabled(ce);
2919 intel_context_get(ce);
2922 decr_context_blocked(ce);
2924 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2927 with_intel_runtime_pm(runtime_pm, wakeref)
2928 __guc_context_sched_enable(guc, ce);
2932 static void guc_context_cancel_request(struct intel_context *ce,
2933 struct i915_request *rq)
2935 struct intel_context *block_context =
2936 request_to_scheduling_context(rq);
2938 if (i915_sw_fence_signaled(&rq->submit)) {
2939 struct i915_sw_fence *fence;
2941 intel_context_get(ce);
2942 fence = guc_context_block(block_context);
2943 i915_sw_fence_wait(fence);
2944 if (!i915_request_completed(rq)) {
2945 __i915_request_skip(rq);
2946 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2950 guc_context_unblock(block_context);
2951 intel_context_put(ce);
2955 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2957 u32 preemption_timeout)
2959 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
2960 struct context_policy policy;
2962 __guc_context_policy_start_klv(&policy, guc_id);
2963 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2964 __guc_context_set_context_policies(guc, &policy, true);
2967 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
2972 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2977 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
2978 unsigned int preempt_timeout_ms)
2980 struct intel_guc *guc = ce_to_guc(ce);
2981 struct intel_runtime_pm *runtime_pm =
2982 &ce->engine->gt->i915->runtime_pm;
2983 intel_wakeref_t wakeref;
2984 unsigned long flags;
2986 GEM_BUG_ON(intel_context_is_child(ce));
2988 guc_flush_submissions(guc);
2990 spin_lock_irqsave(&ce->guc_state.lock, flags);
2991 set_context_banned(ce);
2993 if (submission_disabled(guc) ||
2994 (!context_enabled(ce) && !context_pending_disable(ce))) {
2995 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2997 guc_cancel_context_requests(ce);
2998 intel_engine_signal_breadcrumbs(ce->engine);
2999 } else if (!context_pending_disable(ce)) {
3003 * We add +2 here as the schedule disable complete CTB handler
3004 * calls intel_context_sched_disable_unpin (-2 to pin_count).
3006 atomic_add(2, &ce->pin_count);
3008 guc_id = prep_context_pending_disable(ce);
3009 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3012 * In addition to disabling scheduling, set the preemption
3013 * timeout to the minimum value (1 us) so the banned context
3014 * gets kicked off the HW ASAP.
3016 with_intel_runtime_pm(runtime_pm, wakeref) {
3017 __guc_context_set_preemption_timeout(guc, guc_id,
3018 preempt_timeout_ms);
3019 __guc_context_sched_disable(guc, ce, guc_id);
3022 if (!context_guc_id_invalid(ce))
3023 with_intel_runtime_pm(runtime_pm, wakeref)
3024 __guc_context_set_preemption_timeout(guc,
3026 preempt_timeout_ms);
3027 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3031 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
3032 unsigned long flags)
3033 __releases(ce->guc_state.lock)
3035 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3036 intel_wakeref_t wakeref;
3039 lockdep_assert_held(&ce->guc_state.lock);
3040 guc_id = prep_context_pending_disable(ce);
3042 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3044 with_intel_runtime_pm(runtime_pm, wakeref)
3045 __guc_context_sched_disable(guc, ce, guc_id);
3048 static bool bypass_sched_disable(struct intel_guc *guc,
3049 struct intel_context *ce)
3051 lockdep_assert_held(&ce->guc_state.lock);
3052 GEM_BUG_ON(intel_context_is_child(ce));
3054 if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
3055 !ctx_id_mapped(guc, ce->guc_id.id)) {
3056 clr_context_enabled(ce);
3060 return !context_enabled(ce);
3063 static void __delay_sched_disable(struct work_struct *wrk)
3065 struct intel_context *ce =
3066 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
3067 struct intel_guc *guc = ce_to_guc(ce);
3068 unsigned long flags;
3070 spin_lock_irqsave(&ce->guc_state.lock, flags);
3072 if (bypass_sched_disable(guc, ce)) {
3073 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3074 intel_context_sched_disable_unpin(ce);
3076 do_sched_disable(guc, ce, flags);
3080 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
3083 * parent contexts are perma-pinned, if we are unpinning do schedule
3084 * disable immediately.
3086 if (intel_context_is_parent(ce))
3090 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
3092 return guc->submission_state.guc_ids_in_use >
3093 guc->submission_state.sched_disable_gucid_threshold;
3096 static void guc_context_sched_disable(struct intel_context *ce)
3098 struct intel_guc *guc = ce_to_guc(ce);
3099 u64 delay = guc->submission_state.sched_disable_delay_ms;
3100 unsigned long flags;
3102 spin_lock_irqsave(&ce->guc_state.lock, flags);
3104 if (bypass_sched_disable(guc, ce)) {
3105 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3106 intel_context_sched_disable_unpin(ce);
3107 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
3109 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3110 mod_delayed_work(system_unbound_wq,
3111 &ce->guc_state.sched_disable_delay_work,
3112 msecs_to_jiffies(delay));
3114 do_sched_disable(guc, ce, flags);
3118 static void guc_context_close(struct intel_context *ce)
3120 unsigned long flags;
3122 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
3123 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
3124 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
3126 spin_lock_irqsave(&ce->guc_state.lock, flags);
3127 set_context_close_done(ce);
3128 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3131 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3133 struct intel_guc *guc = ce_to_guc(ce);
3134 struct intel_gt *gt = guc_to_gt(guc);
3135 unsigned long flags;
3138 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3139 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3140 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3141 GEM_BUG_ON(context_enabled(ce));
3143 /* Seal race with Reset */
3144 spin_lock_irqsave(&ce->guc_state.lock, flags);
3145 disabled = submission_disabled(guc);
3146 if (likely(!disabled)) {
3147 __intel_gt_pm_get(gt);
3148 set_context_destroyed(ce);
3149 clr_context_registered(ce);
3151 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3152 if (unlikely(disabled)) {
3153 release_guc_id(guc, ce);
3154 __guc_context_destroy(ce);
3158 deregister_context(ce, ce->guc_id.id);
3161 static void __guc_context_destroy(struct intel_context *ce)
3163 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3164 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3165 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3166 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3169 intel_context_fini(ce);
3171 if (intel_engine_is_virtual(ce->engine)) {
3172 struct guc_virtual_engine *ve =
3173 container_of(ce, typeof(*ve), context);
3175 if (ve->base.breadcrumbs)
3176 intel_breadcrumbs_put(ve->base.breadcrumbs);
3180 intel_context_free(ce);
3184 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3186 struct intel_context *ce;
3187 unsigned long flags;
3189 GEM_BUG_ON(!submission_disabled(guc) &&
3190 guc_submission_initialized(guc));
3192 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3193 spin_lock_irqsave(&guc->submission_state.lock, flags);
3194 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3195 struct intel_context,
3198 list_del_init(&ce->destroyed_link);
3199 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3204 release_guc_id(guc, ce);
3205 __guc_context_destroy(ce);
3209 static void deregister_destroyed_contexts(struct intel_guc *guc)
3211 struct intel_context *ce;
3212 unsigned long flags;
3214 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3215 spin_lock_irqsave(&guc->submission_state.lock, flags);
3216 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3217 struct intel_context,
3220 list_del_init(&ce->destroyed_link);
3221 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3226 guc_lrc_desc_unpin(ce);
3230 static void destroyed_worker_func(struct work_struct *w)
3232 struct intel_guc *guc = container_of(w, struct intel_guc,
3233 submission_state.destroyed_worker);
3234 struct intel_gt *gt = guc_to_gt(guc);
3237 with_intel_gt_pm(gt, tmp)
3238 deregister_destroyed_contexts(guc);
3241 static void guc_context_destroy(struct kref *kref)
3243 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3244 struct intel_guc *guc = ce_to_guc(ce);
3245 unsigned long flags;
3249 * If the guc_id is invalid this context has been stolen and we can free
3250 * it immediately. Also can be freed immediately if the context is not
3251 * registered with the GuC or the GuC is in the middle of a reset.
3253 spin_lock_irqsave(&guc->submission_state.lock, flags);
3254 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3255 !ctx_id_mapped(guc, ce->guc_id.id);
3256 if (likely(!destroy)) {
3257 if (!list_empty(&ce->guc_id.link))
3258 list_del_init(&ce->guc_id.link);
3259 list_add_tail(&ce->destroyed_link,
3260 &guc->submission_state.destroyed_contexts);
3262 __release_guc_id(guc, ce);
3264 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3265 if (unlikely(destroy)) {
3266 __guc_context_destroy(ce);
3271 * We use a worker to issue the H2G to deregister the context as we can
3272 * take the GT PM for the first time which isn't allowed from an atomic
3275 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3278 static int guc_context_alloc(struct intel_context *ce)
3280 return lrc_alloc(ce, ce->engine);
3283 static void __guc_context_set_prio(struct intel_guc *guc,
3284 struct intel_context *ce)
3286 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
3287 struct context_policy policy;
3289 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3290 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3291 __guc_context_set_context_policies(guc, &policy, true);
3294 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3299 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3303 static void guc_context_set_prio(struct intel_guc *guc,
3304 struct intel_context *ce,
3307 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3308 prio > GUC_CLIENT_PRIORITY_NORMAL);
3309 lockdep_assert_held(&ce->guc_state.lock);
3311 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3312 !context_registered(ce)) {
3313 ce->guc_state.prio = prio;
3317 ce->guc_state.prio = prio;
3318 __guc_context_set_prio(guc, ce);
3320 trace_intel_context_set_prio(ce);
3323 static inline u8 map_i915_prio_to_guc_prio(int prio)
3325 if (prio == I915_PRIORITY_NORMAL)
3326 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3327 else if (prio < I915_PRIORITY_NORMAL)
3328 return GUC_CLIENT_PRIORITY_NORMAL;
3329 else if (prio < I915_PRIORITY_DISPLAY)
3330 return GUC_CLIENT_PRIORITY_HIGH;
3332 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3335 static inline void add_context_inflight_prio(struct intel_context *ce,
3338 lockdep_assert_held(&ce->guc_state.lock);
3339 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3341 ++ce->guc_state.prio_count[guc_prio];
3343 /* Overflow protection */
3344 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3347 static inline void sub_context_inflight_prio(struct intel_context *ce,
3350 lockdep_assert_held(&ce->guc_state.lock);
3351 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3353 /* Underflow protection */
3354 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3356 --ce->guc_state.prio_count[guc_prio];
3359 static inline void update_context_prio(struct intel_context *ce)
3361 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3364 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3365 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3367 lockdep_assert_held(&ce->guc_state.lock);
3369 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3370 if (ce->guc_state.prio_count[i]) {
3371 guc_context_set_prio(guc, ce, i);
3377 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3379 /* Lower value is higher priority */
3380 return new_guc_prio < old_guc_prio;
3383 static void add_to_context(struct i915_request *rq)
3385 struct intel_context *ce = request_to_scheduling_context(rq);
3386 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3388 GEM_BUG_ON(intel_context_is_child(ce));
3389 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3391 spin_lock(&ce->guc_state.lock);
3392 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3394 if (rq->guc_prio == GUC_PRIO_INIT) {
3395 rq->guc_prio = new_guc_prio;
3396 add_context_inflight_prio(ce, rq->guc_prio);
3397 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3398 sub_context_inflight_prio(ce, rq->guc_prio);
3399 rq->guc_prio = new_guc_prio;
3400 add_context_inflight_prio(ce, rq->guc_prio);
3402 update_context_prio(ce);
3404 spin_unlock(&ce->guc_state.lock);
3407 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3409 lockdep_assert_held(&ce->guc_state.lock);
3411 if (rq->guc_prio != GUC_PRIO_INIT &&
3412 rq->guc_prio != GUC_PRIO_FINI) {
3413 sub_context_inflight_prio(ce, rq->guc_prio);
3414 update_context_prio(ce);
3416 rq->guc_prio = GUC_PRIO_FINI;
3419 static void remove_from_context(struct i915_request *rq)
3421 struct intel_context *ce = request_to_scheduling_context(rq);
3423 GEM_BUG_ON(intel_context_is_child(ce));
3425 spin_lock_irq(&ce->guc_state.lock);
3427 list_del_init(&rq->sched.link);
3428 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3430 /* Prevent further __await_execution() registering a cb, then flush */
3431 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3433 guc_prio_fini(rq, ce);
3435 spin_unlock_irq(&ce->guc_state.lock);
3437 atomic_dec(&ce->guc_id.ref);
3438 i915_request_notify_execute_cb_imm(rq);
3441 static const struct intel_context_ops guc_context_ops = {
3442 .alloc = guc_context_alloc,
3444 .close = guc_context_close,
3446 .pre_pin = guc_context_pre_pin,
3447 .pin = guc_context_pin,
3448 .unpin = guc_context_unpin,
3449 .post_unpin = guc_context_post_unpin,
3451 .revoke = guc_context_revoke,
3453 .cancel_request = guc_context_cancel_request,
3455 .enter = intel_context_enter_engine,
3456 .exit = intel_context_exit_engine,
3458 .sched_disable = guc_context_sched_disable,
3461 .destroy = guc_context_destroy,
3463 .create_virtual = guc_create_virtual,
3464 .create_parallel = guc_create_parallel,
3467 static void submit_work_cb(struct irq_work *wrk)
3469 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3471 might_lock(&rq->engine->sched_engine->lock);
3472 i915_sw_fence_complete(&rq->submit);
3475 static void __guc_signal_context_fence(struct intel_context *ce)
3477 struct i915_request *rq, *rn;
3479 lockdep_assert_held(&ce->guc_state.lock);
3481 if (!list_empty(&ce->guc_state.fences))
3482 trace_intel_context_fence_release(ce);
3485 * Use an IRQ to ensure locking order of sched_engine->lock ->
3486 * ce->guc_state.lock is preserved.
3488 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3490 list_del(&rq->guc_fence_link);
3491 irq_work_queue(&rq->submit_work);
3494 INIT_LIST_HEAD(&ce->guc_state.fences);
3497 static void guc_signal_context_fence(struct intel_context *ce)
3499 unsigned long flags;
3501 GEM_BUG_ON(intel_context_is_child(ce));
3503 spin_lock_irqsave(&ce->guc_state.lock, flags);
3504 clr_context_wait_for_deregister_to_register(ce);
3505 __guc_signal_context_fence(ce);
3506 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3509 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3511 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3512 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3513 !submission_disabled(ce_to_guc(ce));
3516 static void guc_context_init(struct intel_context *ce)
3518 const struct i915_gem_context *ctx;
3519 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3522 ctx = rcu_dereference(ce->gem_context);
3524 prio = ctx->sched.priority;
3527 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3529 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
3530 __delay_sched_disable);
3532 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3535 static int guc_request_alloc(struct i915_request *rq)
3537 struct intel_context *ce = request_to_scheduling_context(rq);
3538 struct intel_guc *guc = ce_to_guc(ce);
3539 unsigned long flags;
3542 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3545 * Flush enough space to reduce the likelihood of waiting after
3546 * we start building the request - in which case we will just
3547 * have to repeat work.
3549 rq->reserved_space += GUC_REQUEST_SIZE;
3552 * Note that after this point, we have committed to using
3553 * this request as it is being used to both track the
3554 * state of engine initialisation and liveness of the
3555 * golden renderstate above. Think twice before you try
3556 * to cancel/unwind this request now.
3559 /* Unconditionally invalidate GPU caches and TLBs. */
3560 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3564 rq->reserved_space -= GUC_REQUEST_SIZE;
3566 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3567 guc_context_init(ce);
3570 * If the context gets closed while the execbuf is ongoing, the context
3571 * close code will race with the below code to cancel the delayed work.
3572 * If the context close wins the race and cancels the work, it will
3573 * immediately call the sched disable (see guc_context_close), so there
3574 * is a chance we can get past this check while the sched_disable code
3575 * is being executed. To make sure that code completes before we check
3576 * the status further down, we wait for the close process to complete.
3577 * Else, this code path could send a request down thinking that the
3578 * context is still in a schedule-enable mode while the GuC ends up
3579 * dropping the request completely because the disable did go from the
3580 * context_close path right to GuC just prior. In the event the CT is
3581 * full, we could potentially need to wait up to 1.5 seconds.
3583 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
3584 intel_context_sched_disable_unpin(ce);
3585 else if (intel_context_is_closed(ce))
3586 if (wait_for(context_close_done(ce), 1500))
3587 drm_warn(&guc_to_gt(guc)->i915->drm,
3588 "timed out waiting on context sched close before realloc\n");
3590 * Call pin_guc_id here rather than in the pinning step as with
3591 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3592 * guc_id and creating horrible race conditions. This is especially bad
3593 * when guc_id are being stolen due to over subscription. By the time
3594 * this function is reached, it is guaranteed that the guc_id will be
3595 * persistent until the generated request is retired. Thus, sealing these
3596 * race conditions. It is still safe to fail here if guc_id are
3597 * exhausted and return -EAGAIN to the user indicating that they can try
3598 * again in the future.
3600 * There is no need for a lock here as the timeline mutex ensures at
3601 * most one context can be executing this code path at once. The
3602 * guc_id_ref is incremented once for every request in flight and
3603 * decremented on each retire. When it is zero, a lock around the
3604 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3606 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3609 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3610 if (unlikely(ret < 0))
3612 if (context_needs_register(ce, !!ret)) {
3613 ret = try_context_registration(ce, true);
3614 if (unlikely(ret)) { /* unwind */
3615 if (ret == -EPIPE) {
3616 disable_submission(guc);
3617 goto out; /* GPU will be reset */
3619 atomic_dec(&ce->guc_id.ref);
3620 unpin_guc_id(guc, ce);
3625 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3629 * We block all requests on this context if a G2H is pending for a
3630 * schedule disable or context deregistration as the GuC will fail a
3631 * schedule enable or context registration if either G2H is pending
3632 * respectfully. Once a G2H returns, the fence is released that is
3633 * blocking these requests (see guc_signal_context_fence).
3635 spin_lock_irqsave(&ce->guc_state.lock, flags);
3636 if (context_wait_for_deregister_to_register(ce) ||
3637 context_pending_disable(ce)) {
3638 init_irq_work(&rq->submit_work, submit_work_cb);
3639 i915_sw_fence_await(&rq->submit);
3641 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3643 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3648 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3649 struct i915_gem_ww_ctx *ww,
3652 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3654 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3657 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3659 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3660 int ret = __guc_context_pin(ce, engine, vaddr);
3661 intel_engine_mask_t tmp, mask = ce->engine->mask;
3664 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3665 intel_engine_pm_get(engine);
3670 static void guc_virtual_context_unpin(struct intel_context *ce)
3672 intel_engine_mask_t tmp, mask = ce->engine->mask;
3673 struct intel_engine_cs *engine;
3674 struct intel_guc *guc = ce_to_guc(ce);
3676 GEM_BUG_ON(context_enabled(ce));
3677 GEM_BUG_ON(intel_context_is_barrier(ce));
3679 unpin_guc_id(guc, ce);
3682 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3683 intel_engine_pm_put_async(engine);
3686 static void guc_virtual_context_enter(struct intel_context *ce)
3688 intel_engine_mask_t tmp, mask = ce->engine->mask;
3689 struct intel_engine_cs *engine;
3691 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3692 intel_engine_pm_get(engine);
3694 intel_timeline_enter(ce->timeline);
3697 static void guc_virtual_context_exit(struct intel_context *ce)
3699 intel_engine_mask_t tmp, mask = ce->engine->mask;
3700 struct intel_engine_cs *engine;
3702 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3703 intel_engine_pm_put(engine);
3705 intel_timeline_exit(ce->timeline);
3708 static int guc_virtual_context_alloc(struct intel_context *ce)
3710 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3712 return lrc_alloc(ce, engine);
3715 static const struct intel_context_ops virtual_guc_context_ops = {
3716 .alloc = guc_virtual_context_alloc,
3718 .close = guc_context_close,
3720 .pre_pin = guc_virtual_context_pre_pin,
3721 .pin = guc_virtual_context_pin,
3722 .unpin = guc_virtual_context_unpin,
3723 .post_unpin = guc_context_post_unpin,
3725 .revoke = guc_context_revoke,
3727 .cancel_request = guc_context_cancel_request,
3729 .enter = guc_virtual_context_enter,
3730 .exit = guc_virtual_context_exit,
3732 .sched_disable = guc_context_sched_disable,
3734 .destroy = guc_context_destroy,
3736 .get_sibling = guc_virtual_get_sibling,
3739 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3741 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3742 struct intel_guc *guc = ce_to_guc(ce);
3745 GEM_BUG_ON(!intel_context_is_parent(ce));
3746 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3748 ret = pin_guc_id(guc, ce);
3749 if (unlikely(ret < 0))
3752 return __guc_context_pin(ce, engine, vaddr);
3755 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3757 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3759 GEM_BUG_ON(!intel_context_is_child(ce));
3760 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3762 __intel_context_pin(ce->parallel.parent);
3763 return __guc_context_pin(ce, engine, vaddr);
3766 static void guc_parent_context_unpin(struct intel_context *ce)
3768 struct intel_guc *guc = ce_to_guc(ce);
3770 GEM_BUG_ON(context_enabled(ce));
3771 GEM_BUG_ON(intel_context_is_barrier(ce));
3772 GEM_BUG_ON(!intel_context_is_parent(ce));
3773 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3775 unpin_guc_id(guc, ce);
3779 static void guc_child_context_unpin(struct intel_context *ce)
3781 GEM_BUG_ON(context_enabled(ce));
3782 GEM_BUG_ON(intel_context_is_barrier(ce));
3783 GEM_BUG_ON(!intel_context_is_child(ce));
3784 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3789 static void guc_child_context_post_unpin(struct intel_context *ce)
3791 GEM_BUG_ON(!intel_context_is_child(ce));
3792 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3793 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3796 intel_context_unpin(ce->parallel.parent);
3799 static void guc_child_context_destroy(struct kref *kref)
3801 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3803 __guc_context_destroy(ce);
3806 static const struct intel_context_ops virtual_parent_context_ops = {
3807 .alloc = guc_virtual_context_alloc,
3809 .close = guc_context_close,
3811 .pre_pin = guc_context_pre_pin,
3812 .pin = guc_parent_context_pin,
3813 .unpin = guc_parent_context_unpin,
3814 .post_unpin = guc_context_post_unpin,
3816 .revoke = guc_context_revoke,
3818 .cancel_request = guc_context_cancel_request,
3820 .enter = guc_virtual_context_enter,
3821 .exit = guc_virtual_context_exit,
3823 .sched_disable = guc_context_sched_disable,
3825 .destroy = guc_context_destroy,
3827 .get_sibling = guc_virtual_get_sibling,
3830 static const struct intel_context_ops virtual_child_context_ops = {
3831 .alloc = guc_virtual_context_alloc,
3833 .pre_pin = guc_context_pre_pin,
3834 .pin = guc_child_context_pin,
3835 .unpin = guc_child_context_unpin,
3836 .post_unpin = guc_child_context_post_unpin,
3838 .cancel_request = guc_context_cancel_request,
3840 .enter = guc_virtual_context_enter,
3841 .exit = guc_virtual_context_exit,
3843 .destroy = guc_child_context_destroy,
3845 .get_sibling = guc_virtual_get_sibling,
3849 * The below override of the breadcrumbs is enabled when the user configures a
3850 * context for parallel submission (multi-lrc, parent-child).
3852 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3853 * safely preempt all the hw contexts configured for parallel submission
3854 * between each BB. The contract between the i915 and GuC is if the parent
3855 * context can be preempted, all the children can be preempted, and the GuC will
3856 * always try to preempt the parent before the children. A handshake between the
3857 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3858 * creating a window to preempt between each set of BBs.
3860 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3861 u64 offset, u32 len,
3862 const unsigned int flags);
3863 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3864 u64 offset, u32 len,
3865 const unsigned int flags);
3867 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3870 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3873 static struct intel_context *
3874 guc_create_parallel(struct intel_engine_cs **engines,
3875 unsigned int num_siblings,
3878 struct intel_engine_cs **siblings = NULL;
3879 struct intel_context *parent = NULL, *ce, *err;
3882 siblings = kmalloc_array(num_siblings,
3886 return ERR_PTR(-ENOMEM);
3888 for (i = 0; i < width; ++i) {
3889 for (j = 0; j < num_siblings; ++j)
3890 siblings[j] = engines[i * num_siblings + j];
3892 ce = intel_engine_create_virtual(siblings, num_siblings,
3901 parent->ops = &virtual_parent_context_ops;
3903 ce->ops = &virtual_child_context_ops;
3904 intel_context_bind_parent_child(parent, ce);
3908 parent->parallel.fence_context = dma_fence_context_alloc(1);
3910 parent->engine->emit_bb_start =
3911 emit_bb_start_parent_no_preempt_mid_batch;
3912 parent->engine->emit_fini_breadcrumb =
3913 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3914 parent->engine->emit_fini_breadcrumb_dw =
3915 12 + 4 * parent->parallel.number_children;
3916 for_each_child(parent, ce) {
3917 ce->engine->emit_bb_start =
3918 emit_bb_start_child_no_preempt_mid_batch;
3919 ce->engine->emit_fini_breadcrumb =
3920 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3921 ce->engine->emit_fini_breadcrumb_dw = 16;
3929 intel_context_put(parent);
3935 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3937 struct intel_engine_cs *sibling;
3938 intel_engine_mask_t tmp, mask = b->engine_mask;
3939 bool result = false;
3941 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3942 result |= intel_engine_irq_enable(sibling);
3948 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3950 struct intel_engine_cs *sibling;
3951 intel_engine_mask_t tmp, mask = b->engine_mask;
3953 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3954 intel_engine_irq_disable(sibling);
3957 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3962 * In GuC submission mode we do not know which physical engine a request
3963 * will be scheduled on, this creates a problem because the breadcrumb
3964 * interrupt is per physical engine. To work around this we attach
3965 * requests and direct all breadcrumb interrupts to the first instance
3966 * of an engine per class. In addition all breadcrumb interrupts are
3967 * enabled / disabled across an engine class in unison.
3969 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3970 struct intel_engine_cs *sibling =
3971 engine->gt->engine_class[engine->class][i];
3974 if (engine->breadcrumbs != sibling->breadcrumbs) {
3975 intel_breadcrumbs_put(engine->breadcrumbs);
3976 engine->breadcrumbs =
3977 intel_breadcrumbs_get(sibling->breadcrumbs);
3983 if (engine->breadcrumbs) {
3984 engine->breadcrumbs->engine_mask |= engine->mask;
3985 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3986 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3990 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3993 struct intel_context *ce = request_to_scheduling_context(rq);
3994 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3996 /* Short circuit function */
3997 if (prio < I915_PRIORITY_NORMAL ||
3998 rq->guc_prio == GUC_PRIO_FINI ||
3999 (rq->guc_prio != GUC_PRIO_INIT &&
4000 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
4003 spin_lock(&ce->guc_state.lock);
4004 if (rq->guc_prio != GUC_PRIO_FINI) {
4005 if (rq->guc_prio != GUC_PRIO_INIT)
4006 sub_context_inflight_prio(ce, rq->guc_prio);
4007 rq->guc_prio = new_guc_prio;
4008 add_context_inflight_prio(ce, rq->guc_prio);
4009 update_context_prio(ce);
4011 spin_unlock(&ce->guc_state.lock);
4014 static void guc_retire_inflight_request_prio(struct i915_request *rq)
4016 struct intel_context *ce = request_to_scheduling_context(rq);
4018 spin_lock(&ce->guc_state.lock);
4019 guc_prio_fini(rq, ce);
4020 spin_unlock(&ce->guc_state.lock);
4023 static void sanitize_hwsp(struct intel_engine_cs *engine)
4025 struct intel_timeline *tl;
4027 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
4028 intel_timeline_reset_seqno(tl);
4031 static void guc_sanitize(struct intel_engine_cs *engine)
4034 * Poison residual state on resume, in case the suspend didn't!
4036 * We have to assume that across suspend/resume (or other loss
4037 * of control) that the contents of our pinned buffers has been
4038 * lost, replaced by garbage. Since this doesn't always happen,
4039 * let's poison such state so that we more quickly spot when
4040 * we falsely assume it has been preserved.
4042 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4043 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4046 * The kernel_context HWSP is stored in the status_page. As above,
4047 * that may be lost on resume/initialisation, and so we need to
4048 * reset the value in the HWSP.
4050 sanitize_hwsp(engine);
4052 /* And scrub the dirty cachelines for the HWSP */
4053 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4055 intel_engine_reset_pinned_contexts(engine);
4058 static void setup_hwsp(struct intel_engine_cs *engine)
4060 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4062 ENGINE_WRITE_FW(engine,
4064 i915_ggtt_offset(engine->status_page.vma));
4067 static void start_engine(struct intel_engine_cs *engine)
4069 ENGINE_WRITE_FW(engine,
4071 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4073 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4074 ENGINE_POSTING_READ(engine, RING_MI_MODE);
4077 static int guc_resume(struct intel_engine_cs *engine)
4079 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4081 intel_mocs_init_engine(engine);
4083 intel_breadcrumbs_reset(engine->breadcrumbs);
4086 start_engine(engine);
4088 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4089 xehp_enable_ccs_engines(engine);
4094 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4096 return !sched_engine->tasklet.callback;
4099 static void guc_set_default_submission(struct intel_engine_cs *engine)
4101 engine->submit_request = guc_submit_request;
4104 static inline void guc_kernel_context_pin(struct intel_guc *guc,
4105 struct intel_context *ce)
4108 * Note: we purposefully do not check the returns below because
4109 * the registration can only fail if a reset is just starting.
4110 * This is called at the end of reset so presumably another reset
4111 * isn't happening and even it did this code would be run again.
4114 if (context_guc_id_invalid(ce))
4115 pin_guc_id(guc, ce);
4117 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
4118 guc_context_init(ce);
4120 try_context_registration(ce, true);
4123 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4125 struct intel_gt *gt = guc_to_gt(guc);
4126 struct intel_engine_cs *engine;
4127 enum intel_engine_id id;
4129 /* make sure all descriptors are clean... */
4130 xa_destroy(&guc->context_lookup);
4133 * A reset might have occurred while we had a pending stalled request,
4134 * so make sure we clean that up.
4136 guc->stalled_request = NULL;
4137 guc->submission_stall_reason = STALL_NONE;
4140 * Some contexts might have been pinned before we enabled GuC
4141 * submission, so we need to add them to the GuC bookeeping.
4142 * Also, after a reset the of the GuC we want to make sure that the
4143 * information shared with GuC is properly reset. The kernel LRCs are
4144 * not attached to the gem_context, so they need to be added separately.
4146 for_each_engine(engine, gt, id) {
4147 struct intel_context *ce;
4149 list_for_each_entry(ce, &engine->pinned_contexts_list,
4150 pinned_contexts_link)
4151 guc_kernel_context_pin(guc, ce);
4155 static void guc_release(struct intel_engine_cs *engine)
4157 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4159 intel_engine_cleanup_common(engine);
4160 lrc_fini_wa_ctx(engine);
4163 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4165 struct intel_engine_cs *e;
4166 intel_engine_mask_t tmp, mask = engine->mask;
4168 for_each_engine_masked(e, engine->gt, mask, tmp)
4172 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4174 /* Default vfuncs which can be overridden by each engine. */
4176 engine->resume = guc_resume;
4178 engine->cops = &guc_context_ops;
4179 engine->request_alloc = guc_request_alloc;
4180 engine->add_active_request = add_to_context;
4181 engine->remove_active_request = remove_from_context;
4183 engine->sched_engine->schedule = i915_schedule;
4185 engine->reset.prepare = guc_engine_reset_prepare;
4186 engine->reset.rewind = guc_rewind_nop;
4187 engine->reset.cancel = guc_reset_nop;
4188 engine->reset.finish = guc_reset_nop;
4190 engine->emit_flush = gen8_emit_flush_xcs;
4191 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4192 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4193 if (GRAPHICS_VER(engine->i915) >= 12) {
4194 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4195 engine->emit_flush = gen12_emit_flush_xcs;
4197 engine->set_default_submission = guc_set_default_submission;
4198 engine->busyness = guc_engine_busyness;
4200 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4201 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4202 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4204 /* Wa_14014475959:dg2 */
4205 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
4206 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4209 * TODO: GuC supports timeslicing and semaphores as well, but they're
4210 * handled by the firmware so some minor tweaks are required before
4213 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4216 engine->emit_bb_start = gen8_emit_bb_start;
4217 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4218 engine->emit_bb_start = xehp_emit_bb_start;
4221 static void rcs_submission_override(struct intel_engine_cs *engine)
4223 switch (GRAPHICS_VER(engine->i915)) {
4225 engine->emit_flush = gen12_emit_flush_rcs;
4226 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4229 engine->emit_flush = gen11_emit_flush_rcs;
4230 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4233 engine->emit_flush = gen8_emit_flush_rcs;
4234 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4239 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4241 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4242 intel_engine_set_irq_handler(engine, cs_irq_handler);
4245 static void guc_sched_engine_destroy(struct kref *kref)
4247 struct i915_sched_engine *sched_engine =
4248 container_of(kref, typeof(*sched_engine), ref);
4249 struct intel_guc *guc = sched_engine->private_data;
4251 guc->sched_engine = NULL;
4252 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4253 kfree(sched_engine);
4256 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4258 struct drm_i915_private *i915 = engine->i915;
4259 struct intel_guc *guc = &engine->gt->uc.guc;
4262 * The setup relies on several assumptions (e.g. irqs always enabled)
4263 * that are only valid on gen11+
4265 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4267 if (!guc->sched_engine) {
4268 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4269 if (!guc->sched_engine)
4272 guc->sched_engine->schedule = i915_schedule;
4273 guc->sched_engine->disabled = guc_sched_engine_disabled;
4274 guc->sched_engine->private_data = guc;
4275 guc->sched_engine->destroy = guc_sched_engine_destroy;
4276 guc->sched_engine->bump_inflight_request_prio =
4277 guc_bump_inflight_request_prio;
4278 guc->sched_engine->retire_inflight_request_prio =
4279 guc_retire_inflight_request_prio;
4280 tasklet_setup(&guc->sched_engine->tasklet,
4281 guc_submission_tasklet);
4283 i915_sched_engine_put(engine->sched_engine);
4284 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4286 guc_default_vfuncs(engine);
4287 guc_default_irqs(engine);
4288 guc_init_breadcrumbs(engine);
4290 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4291 rcs_submission_override(engine);
4293 lrc_init_wa_ctx(engine);
4295 /* Finally, take ownership and responsibility for cleanup! */
4296 engine->sanitize = guc_sanitize;
4297 engine->release = guc_release;
4302 struct scheduling_policy {
4304 u32 max_words, num_words;
4307 struct guc_update_scheduling_policy h2g;
4310 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
4312 u32 *start = (void *)&policy->h2g;
4313 u32 *end = policy->h2g.data + policy->num_words;
4314 size_t delta = end - start;
4319 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
4321 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
4322 policy->max_words = ARRAY_SIZE(policy->h2g.data);
4323 policy->num_words = 0;
4329 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
4330 u32 action, u32 *data, u32 len)
4332 u32 *klv_ptr = policy->h2g.data + policy->num_words;
4334 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
4335 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
4336 FIELD_PREP(GUC_KLV_0_LEN, len);
4337 memcpy(klv_ptr, data, sizeof(u32) * len);
4338 policy->num_words += 1 + len;
4342 static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
4343 struct scheduling_policy *policy)
4347 ret = intel_guc_send(guc, (u32 *)&policy->h2g,
4348 __guc_scheduling_policy_action_size(policy));
4352 if (ret != policy->count) {
4353 drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
4354 ret, policy->count);
4355 if (ret > policy->count)
4362 static int guc_init_global_schedule_policy(struct intel_guc *guc)
4364 struct scheduling_policy policy;
4365 struct intel_gt *gt = guc_to_gt(guc);
4366 intel_wakeref_t wakeref;
4369 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
4372 __guc_scheduling_policy_start_klv(&policy);
4374 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
4376 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
4377 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
4380 __guc_scheduling_policy_add_klv(&policy,
4381 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
4382 yield, ARRAY_SIZE(yield));
4384 ret = __guc_action_set_scheduling_policies(guc, &policy);
4386 i915_probe_error(gt->i915,
4387 "Failed to configure global scheduling policies: %pe!\n",
4394 void intel_guc_submission_enable(struct intel_guc *guc)
4396 struct intel_gt *gt = guc_to_gt(guc);
4398 /* Enable and route to GuC */
4399 if (GRAPHICS_VER(gt->i915) >= 12)
4400 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
4401 GUC_SEM_INTR_ROUTE_TO_GUC |
4402 GUC_SEM_INTR_ENABLE_ALL);
4404 guc_init_lrc_mapping(guc);
4405 guc_init_engine_stats(guc);
4406 guc_init_global_schedule_policy(guc);
4409 void intel_guc_submission_disable(struct intel_guc *guc)
4411 struct intel_gt *gt = guc_to_gt(guc);
4413 /* Note: By the time we're here, GuC may have already been reset */
4415 /* Disable and route to host */
4416 if (GRAPHICS_VER(gt->i915) >= 12)
4417 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
4420 static bool __guc_submission_supported(struct intel_guc *guc)
4422 /* GuC submission is unavailable for pre-Gen11 */
4423 return intel_guc_is_supported(guc) &&
4424 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4427 static bool __guc_submission_selected(struct intel_guc *guc)
4429 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4431 if (!intel_guc_submission_is_supported(guc))
4434 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4437 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
4439 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
4443 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
4444 * workloads are able to enjoy the latency reduction when delaying the schedule-disable
4445 * operation. This matches the 30fps game-render + encode (real world) workload this
4446 * knob was tested against.
4448 #define SCHED_DISABLE_DELAY_MS 34
4451 * A threshold of 75% is a reasonable starting point considering that real world apps
4452 * generally don't get anywhere near this.
4454 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
4455 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
4457 void intel_guc_submission_init_early(struct intel_guc *guc)
4459 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4461 spin_lock_init(&guc->submission_state.lock);
4462 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4463 ida_init(&guc->submission_state.guc_ids);
4464 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4465 INIT_WORK(&guc->submission_state.destroyed_worker,
4466 destroyed_worker_func);
4467 INIT_WORK(&guc->submission_state.reset_fail_worker,
4468 reset_fail_worker_func);
4470 spin_lock_init(&guc->timestamp.lock);
4471 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4473 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
4474 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4475 guc->submission_state.sched_disable_gucid_threshold =
4476 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
4477 guc->submission_supported = __guc_submission_supported(guc);
4478 guc->submission_selected = __guc_submission_selected(guc);
4481 static inline struct intel_context *
4482 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4484 struct intel_context *ce;
4486 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4487 drm_err(&guc_to_gt(guc)->i915->drm,
4488 "Invalid ctx_id %u\n", ctx_id);
4492 ce = __get_context(guc, ctx_id);
4493 if (unlikely(!ce)) {
4494 drm_err(&guc_to_gt(guc)->i915->drm,
4495 "Context is NULL, ctx_id %u\n", ctx_id);
4499 if (unlikely(intel_context_is_child(ce))) {
4500 drm_err(&guc_to_gt(guc)->i915->drm,
4501 "Context is child, ctx_id %u\n", ctx_id);
4508 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4512 struct intel_context *ce;
4515 if (unlikely(len < 1)) {
4516 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4521 ce = g2h_context_lookup(guc, ctx_id);
4525 trace_intel_context_deregister_done(ce);
4527 #ifdef CONFIG_DRM_I915_SELFTEST
4528 if (unlikely(ce->drop_deregister)) {
4529 ce->drop_deregister = false;
4534 if (context_wait_for_deregister_to_register(ce)) {
4535 struct intel_runtime_pm *runtime_pm =
4536 &ce->engine->gt->i915->runtime_pm;
4537 intel_wakeref_t wakeref;
4540 * Previous owner of this guc_id has been deregistered, now safe
4541 * register this context.
4543 with_intel_runtime_pm(runtime_pm, wakeref)
4544 register_context(ce, true);
4545 guc_signal_context_fence(ce);
4546 intel_context_put(ce);
4547 } else if (context_destroyed(ce)) {
4548 /* Context has been destroyed */
4549 intel_gt_pm_put_async(guc_to_gt(guc));
4550 release_guc_id(guc, ce);
4551 __guc_context_destroy(ce);
4554 decr_outstanding_submission_g2h(guc);
4559 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4563 struct intel_context *ce;
4564 unsigned long flags;
4567 if (unlikely(len < 2)) {
4568 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4573 ce = g2h_context_lookup(guc, ctx_id);
4577 if (unlikely(context_destroyed(ce) ||
4578 (!context_pending_enable(ce) &&
4579 !context_pending_disable(ce)))) {
4580 drm_err(&guc_to_gt(guc)->i915->drm,
4581 "Bad context sched_state 0x%x, ctx_id %u\n",
4582 ce->guc_state.sched_state, ctx_id);
4586 trace_intel_context_sched_done(ce);
4588 if (context_pending_enable(ce)) {
4589 #ifdef CONFIG_DRM_I915_SELFTEST
4590 if (unlikely(ce->drop_schedule_enable)) {
4591 ce->drop_schedule_enable = false;
4596 spin_lock_irqsave(&ce->guc_state.lock, flags);
4597 clr_context_pending_enable(ce);
4598 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4599 } else if (context_pending_disable(ce)) {
4602 #ifdef CONFIG_DRM_I915_SELFTEST
4603 if (unlikely(ce->drop_schedule_disable)) {
4604 ce->drop_schedule_disable = false;
4610 * Unpin must be done before __guc_signal_context_fence,
4611 * otherwise a race exists between the requests getting
4612 * submitted + retired before this unpin completes resulting in
4613 * the pin_count going to zero and the context still being
4616 intel_context_sched_disable_unpin(ce);
4618 spin_lock_irqsave(&ce->guc_state.lock, flags);
4619 banned = context_banned(ce);
4620 clr_context_banned(ce);
4621 clr_context_pending_disable(ce);
4622 __guc_signal_context_fence(ce);
4623 guc_blocked_fence_complete(ce);
4624 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4627 guc_cancel_context_requests(ce);
4628 intel_engine_signal_breadcrumbs(ce->engine);
4632 decr_outstanding_submission_g2h(guc);
4633 intel_context_put(ce);
4638 static void capture_error_state(struct intel_guc *guc,
4639 struct intel_context *ce)
4641 struct intel_gt *gt = guc_to_gt(guc);
4642 struct drm_i915_private *i915 = gt->i915;
4643 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4644 intel_wakeref_t wakeref;
4646 intel_engine_set_hung_context(engine, ce);
4647 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4648 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4649 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
4652 static void guc_context_replay(struct intel_context *ce)
4654 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4656 __guc_reset_context(ce, ce->engine->mask);
4657 tasklet_hi_schedule(&sched_engine->tasklet);
4660 static void guc_handle_context_reset(struct intel_guc *guc,
4661 struct intel_context *ce)
4663 trace_intel_context_reset(ce);
4665 if (likely(intel_context_is_schedulable(ce))) {
4666 capture_error_state(guc, ce);
4667 guc_context_replay(ce);
4669 drm_info(&guc_to_gt(guc)->i915->drm,
4670 "Ignoring context reset notification of exiting context 0x%04X on %s",
4671 ce->guc_id.id, ce->engine->name);
4675 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4676 const u32 *msg, u32 len)
4678 struct intel_context *ce;
4679 unsigned long flags;
4682 if (unlikely(len != 1)) {
4683 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4690 * The context lookup uses the xarray but lookups only require an RCU lock
4691 * not the full spinlock. So take the lock explicitly and keep it until the
4692 * context has been reference count locked to ensure it can't be destroyed
4693 * asynchronously until the reset is done.
4695 xa_lock_irqsave(&guc->context_lookup, flags);
4696 ce = g2h_context_lookup(guc, ctx_id);
4698 intel_context_get(ce);
4699 xa_unlock_irqrestore(&guc->context_lookup, flags);
4704 guc_handle_context_reset(guc, ce);
4705 intel_context_put(ce);
4710 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4711 const u32 *msg, u32 len)
4715 if (unlikely(len != 1)) {
4716 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4720 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4721 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4722 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
4724 intel_guc_capture_process(guc);
4729 struct intel_engine_cs *
4730 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4732 struct intel_gt *gt = guc_to_gt(guc);
4733 u8 engine_class = guc_class_to_engine_class(guc_class);
4735 /* Class index is checked in class converter */
4736 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4738 return gt->engine_class[engine_class][instance];
4741 static void reset_fail_worker_func(struct work_struct *w)
4743 struct intel_guc *guc = container_of(w, struct intel_guc,
4744 submission_state.reset_fail_worker);
4745 struct intel_gt *gt = guc_to_gt(guc);
4746 intel_engine_mask_t reset_fail_mask;
4747 unsigned long flags;
4749 spin_lock_irqsave(&guc->submission_state.lock, flags);
4750 reset_fail_mask = guc->submission_state.reset_fail_mask;
4751 guc->submission_state.reset_fail_mask = 0;
4752 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4754 if (likely(reset_fail_mask))
4755 intel_gt_handle_error(gt, reset_fail_mask,
4757 "GuC failed to reset engine mask=0x%x\n",
4761 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4762 const u32 *msg, u32 len)
4764 struct intel_engine_cs *engine;
4765 struct intel_gt *gt = guc_to_gt(guc);
4766 u8 guc_class, instance;
4768 unsigned long flags;
4770 if (unlikely(len != 3)) {
4771 drm_err(>->i915->drm, "Invalid length %u", len);
4779 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4780 if (unlikely(!engine)) {
4781 drm_err(>->i915->drm,
4782 "Invalid engine %d:%d", guc_class, instance);
4787 * This is an unexpected failure of a hardware feature. So, log a real
4788 * error message not just the informational that comes with the reset.
4790 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
4791 guc_class, instance, engine->name, reason);
4793 spin_lock_irqsave(&guc->submission_state.lock, flags);
4794 guc->submission_state.reset_fail_mask |= engine->mask;
4795 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4798 * A GT reset flushes this worker queue (G2H handler) so we must use
4799 * another worker to trigger a GT reset.
4801 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4806 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4808 struct intel_guc *guc = &engine->gt->uc.guc;
4809 struct intel_context *ce;
4810 struct i915_request *rq;
4811 unsigned long index;
4812 unsigned long flags;
4814 /* Reset called during driver load? GuC not yet initialised! */
4815 if (unlikely(!guc_submission_initialized(guc)))
4818 xa_lock_irqsave(&guc->context_lookup, flags);
4819 xa_for_each(&guc->context_lookup, index, ce) {
4820 if (!kref_get_unless_zero(&ce->ref))
4823 xa_unlock(&guc->context_lookup);
4825 if (!intel_context_is_pinned(ce))
4828 if (intel_engine_is_virtual(ce->engine)) {
4829 if (!(ce->engine->mask & engine->mask))
4832 if (ce->engine != engine)
4836 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4837 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4840 intel_engine_set_hung_context(engine, ce);
4842 /* Can only cope with one hang at a time... */
4843 intel_context_put(ce);
4844 xa_lock(&guc->context_lookup);
4848 intel_context_put(ce);
4849 xa_lock(&guc->context_lookup);
4852 xa_unlock_irqrestore(&guc->context_lookup, flags);
4855 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4856 struct i915_request *hung_rq,
4857 struct drm_printer *m)
4859 struct intel_guc *guc = &engine->gt->uc.guc;
4860 struct intel_context *ce;
4861 unsigned long index;
4862 unsigned long flags;
4864 /* Reset called during driver load? GuC not yet initialised! */
4865 if (unlikely(!guc_submission_initialized(guc)))
4868 xa_lock_irqsave(&guc->context_lookup, flags);
4869 xa_for_each(&guc->context_lookup, index, ce) {
4870 if (!kref_get_unless_zero(&ce->ref))
4873 xa_unlock(&guc->context_lookup);
4875 if (!intel_context_is_pinned(ce))
4878 if (intel_engine_is_virtual(ce->engine)) {
4879 if (!(ce->engine->mask & engine->mask))
4882 if (ce->engine != engine)
4886 spin_lock(&ce->guc_state.lock);
4887 intel_engine_dump_active_requests(&ce->guc_state.requests,
4889 spin_unlock(&ce->guc_state.lock);
4892 intel_context_put(ce);
4893 xa_lock(&guc->context_lookup);
4895 xa_unlock_irqrestore(&guc->context_lookup, flags);
4898 void intel_guc_submission_print_info(struct intel_guc *guc,
4899 struct drm_printer *p)
4901 struct i915_sched_engine *sched_engine = guc->sched_engine;
4903 unsigned long flags;
4908 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4909 atomic_read(&guc->outstanding_submission_g2h));
4910 drm_printf(p, "GuC tasklet count: %u\n",
4911 atomic_read(&sched_engine->tasklet.count));
4913 spin_lock_irqsave(&sched_engine->lock, flags);
4914 drm_printf(p, "Requests in GuC submit tasklet:\n");
4915 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4916 struct i915_priolist *pl = to_priolist(rb);
4917 struct i915_request *rq;
4919 priolist_for_each_request(rq, pl)
4920 drm_printf(p, "guc_id=%u, seqno=%llu\n",
4921 rq->context->guc_id.id,
4924 spin_unlock_irqrestore(&sched_engine->lock, flags);
4925 drm_printf(p, "\n");
4928 static inline void guc_log_context_priority(struct drm_printer *p,
4929 struct intel_context *ce)
4933 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4934 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4935 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
4936 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
4937 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4938 i, ce->guc_state.prio_count[i]);
4940 drm_printf(p, "\n");
4943 static inline void guc_log_context(struct drm_printer *p,
4944 struct intel_context *ce)
4946 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4947 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4948 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4950 ce->lrc_reg_state[CTX_RING_HEAD]);
4951 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4953 ce->lrc_reg_state[CTX_RING_TAIL]);
4954 drm_printf(p, "\t\tContext Pin Count: %u\n",
4955 atomic_read(&ce->pin_count));
4956 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4957 atomic_read(&ce->guc_id.ref));
4958 drm_printf(p, "\t\tSchedule State: 0x%x\n",
4959 ce->guc_state.sched_state);
4962 void intel_guc_submission_print_context_info(struct intel_guc *guc,
4963 struct drm_printer *p)
4965 struct intel_context *ce;
4966 unsigned long index;
4967 unsigned long flags;
4969 xa_lock_irqsave(&guc->context_lookup, flags);
4970 xa_for_each(&guc->context_lookup, index, ce) {
4971 GEM_BUG_ON(intel_context_is_child(ce));
4973 guc_log_context(p, ce);
4974 guc_log_context_priority(p, ce);
4976 if (intel_context_is_parent(ce)) {
4977 struct intel_context *child;
4979 drm_printf(p, "\t\tNumber children: %u\n",
4980 ce->parallel.number_children);
4982 if (ce->parallel.guc.wq_status) {
4983 drm_printf(p, "\t\tWQI Head: %u\n",
4984 READ_ONCE(*ce->parallel.guc.wq_head));
4985 drm_printf(p, "\t\tWQI Tail: %u\n",
4986 READ_ONCE(*ce->parallel.guc.wq_tail));
4987 drm_printf(p, "\t\tWQI Status: %u\n",
4988 READ_ONCE(*ce->parallel.guc.wq_status));
4991 if (ce->engine->emit_bb_start ==
4992 emit_bb_start_parent_no_preempt_mid_batch) {
4995 drm_printf(p, "\t\tChildren Go: %u\n",
4996 get_children_go_value(ce));
4997 for (i = 0; i < ce->parallel.number_children; ++i)
4998 drm_printf(p, "\t\tChildren Join: %u\n",
4999 get_children_join_value(ce, i));
5002 for_each_child(ce, child)
5003 guc_log_context(p, child);
5006 xa_unlock_irqrestore(&guc->context_lookup, flags);
5009 static inline u32 get_children_go_addr(struct intel_context *ce)
5011 GEM_BUG_ON(!intel_context_is_parent(ce));
5013 return i915_ggtt_offset(ce->state) +
5014 __get_parent_scratch_offset(ce) +
5015 offsetof(struct parent_scratch, go.semaphore);
5018 static inline u32 get_children_join_addr(struct intel_context *ce,
5021 GEM_BUG_ON(!intel_context_is_parent(ce));
5023 return i915_ggtt_offset(ce->state) +
5024 __get_parent_scratch_offset(ce) +
5025 offsetof(struct parent_scratch, join[child_index].semaphore);
5028 #define PARENT_GO_BB 1
5029 #define PARENT_GO_FINI_BREADCRUMB 0
5030 #define CHILD_GO_BB 1
5031 #define CHILD_GO_FINI_BREADCRUMB 0
5032 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
5033 u64 offset, u32 len,
5034 const unsigned int flags)
5036 struct intel_context *ce = rq->context;
5040 GEM_BUG_ON(!intel_context_is_parent(ce));
5042 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
5046 /* Wait on children */
5047 for (i = 0; i < ce->parallel.number_children; ++i) {
5048 *cs++ = (MI_SEMAPHORE_WAIT |
5049 MI_SEMAPHORE_GLOBAL_GTT |
5051 MI_SEMAPHORE_SAD_EQ_SDD);
5052 *cs++ = PARENT_GO_BB;
5053 *cs++ = get_children_join_addr(ce, i);
5057 /* Turn off preemption */
5058 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5061 /* Tell children go */
5062 cs = gen8_emit_ggtt_write(cs,
5064 get_children_go_addr(ce),
5068 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5069 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5070 *cs++ = lower_32_bits(offset);
5071 *cs++ = upper_32_bits(offset);
5074 intel_ring_advance(rq, cs);
5079 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
5080 u64 offset, u32 len,
5081 const unsigned int flags)
5083 struct intel_context *ce = rq->context;
5084 struct intel_context *parent = intel_context_to_parent(ce);
5087 GEM_BUG_ON(!intel_context_is_child(ce));
5089 cs = intel_ring_begin(rq, 12);
5094 cs = gen8_emit_ggtt_write(cs,
5096 get_children_join_addr(parent,
5097 ce->parallel.child_index),
5100 /* Wait on parent for go */
5101 *cs++ = (MI_SEMAPHORE_WAIT |
5102 MI_SEMAPHORE_GLOBAL_GTT |
5104 MI_SEMAPHORE_SAD_EQ_SDD);
5105 *cs++ = CHILD_GO_BB;
5106 *cs++ = get_children_go_addr(parent);
5109 /* Turn off preemption */
5110 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5113 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5114 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5115 *cs++ = lower_32_bits(offset);
5116 *cs++ = upper_32_bits(offset);
5118 intel_ring_advance(rq, cs);
5124 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5127 struct intel_context *ce = rq->context;
5130 GEM_BUG_ON(!intel_context_is_parent(ce));
5132 /* Wait on children */
5133 for (i = 0; i < ce->parallel.number_children; ++i) {
5134 *cs++ = (MI_SEMAPHORE_WAIT |
5135 MI_SEMAPHORE_GLOBAL_GTT |
5137 MI_SEMAPHORE_SAD_EQ_SDD);
5138 *cs++ = PARENT_GO_FINI_BREADCRUMB;
5139 *cs++ = get_children_join_addr(ce, i);
5143 /* Turn on preemption */
5144 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5147 /* Tell children go */
5148 cs = gen8_emit_ggtt_write(cs,
5149 CHILD_GO_FINI_BREADCRUMB,
5150 get_children_go_addr(ce),
5157 * If this true, a submission of multi-lrc requests had an error and the
5158 * requests need to be skipped. The front end (execuf IOCTL) should've called
5159 * i915_request_skip which squashes the BB but we still need to emit the fini
5160 * breadrcrumbs seqno write. At this point we don't know how many of the
5161 * requests in the multi-lrc submission were generated so we can't do the
5162 * handshake between the parent and children (e.g. if 4 requests should be
5163 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
5164 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
5165 * has occurred on any of the requests in submission / relationship.
5167 static inline bool skip_handshake(struct i915_request *rq)
5169 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
5172 #define NON_SKIP_LEN 6
5174 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5177 struct intel_context *ce = rq->context;
5178 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5179 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5181 GEM_BUG_ON(!intel_context_is_parent(ce));
5183 if (unlikely(skip_handshake(rq))) {
5185 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
5186 * the NON_SKIP_LEN comes from the length of the emits below.
5188 memset(cs, 0, sizeof(u32) *
5189 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5190 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5192 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5195 /* Emit fini breadcrumb */
5196 before_fini_breadcrumb_user_interrupt_cs = cs;
5197 cs = gen8_emit_ggtt_write(cs,
5199 i915_request_active_timeline(rq)->hwsp_offset,
5202 /* User interrupt */
5203 *cs++ = MI_USER_INTERRUPT;
5206 /* Ensure our math for skip + emit is correct */
5207 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5209 GEM_BUG_ON(start_fini_breadcrumb_cs +
5210 ce->engine->emit_fini_breadcrumb_dw != cs);
5212 rq->tail = intel_ring_offset(rq, cs);
5218 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5221 struct intel_context *ce = rq->context;
5222 struct intel_context *parent = intel_context_to_parent(ce);
5224 GEM_BUG_ON(!intel_context_is_child(ce));
5226 /* Turn on preemption */
5227 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5231 cs = gen8_emit_ggtt_write(cs,
5232 PARENT_GO_FINI_BREADCRUMB,
5233 get_children_join_addr(parent,
5234 ce->parallel.child_index),
5237 /* Wait parent on for go */
5238 *cs++ = (MI_SEMAPHORE_WAIT |
5239 MI_SEMAPHORE_GLOBAL_GTT |
5241 MI_SEMAPHORE_SAD_EQ_SDD);
5242 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5243 *cs++ = get_children_go_addr(parent);
5250 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5253 struct intel_context *ce = rq->context;
5254 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5255 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5257 GEM_BUG_ON(!intel_context_is_child(ce));
5259 if (unlikely(skip_handshake(rq))) {
5261 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5262 * the NON_SKIP_LEN comes from the length of the emits below.
5264 memset(cs, 0, sizeof(u32) *
5265 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5266 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5268 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5271 /* Emit fini breadcrumb */
5272 before_fini_breadcrumb_user_interrupt_cs = cs;
5273 cs = gen8_emit_ggtt_write(cs,
5275 i915_request_active_timeline(rq)->hwsp_offset,
5278 /* User interrupt */
5279 *cs++ = MI_USER_INTERRUPT;
5282 /* Ensure our math for skip + emit is correct */
5283 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5285 GEM_BUG_ON(start_fini_breadcrumb_cs +
5286 ce->engine->emit_fini_breadcrumb_dw != cs);
5288 rq->tail = intel_ring_offset(rq, cs);
5295 static struct intel_context *
5296 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5297 unsigned long flags)
5299 struct guc_virtual_engine *ve;
5300 struct intel_guc *guc;
5304 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5306 return ERR_PTR(-ENOMEM);
5308 guc = &siblings[0]->gt->uc.guc;
5310 ve->base.i915 = siblings[0]->i915;
5311 ve->base.gt = siblings[0]->gt;
5312 ve->base.uncore = siblings[0]->uncore;
5315 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5316 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5317 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5318 ve->base.saturated = ALL_ENGINES;
5320 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5322 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5324 ve->base.cops = &virtual_guc_context_ops;
5325 ve->base.request_alloc = guc_request_alloc;
5326 ve->base.bump_serial = virtual_guc_bump_serial;
5328 ve->base.submit_request = guc_submit_request;
5330 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5332 intel_context_init(&ve->context, &ve->base);
5334 for (n = 0; n < count; n++) {
5335 struct intel_engine_cs *sibling = siblings[n];
5337 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5338 if (sibling->mask & ve->base.mask) {
5339 DRM_DEBUG("duplicate %s entry in load balancer\n",
5345 ve->base.mask |= sibling->mask;
5346 ve->base.logical_mask |= sibling->logical_mask;
5348 if (n != 0 && ve->base.class != sibling->class) {
5349 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5350 sibling->class, ve->base.class);
5353 } else if (n == 0) {
5354 ve->base.class = sibling->class;
5355 ve->base.uabi_class = sibling->uabi_class;
5356 snprintf(ve->base.name, sizeof(ve->base.name),
5357 "v%dx%d", ve->base.class, count);
5358 ve->base.context_size = sibling->context_size;
5360 ve->base.add_active_request =
5361 sibling->add_active_request;
5362 ve->base.remove_active_request =
5363 sibling->remove_active_request;
5364 ve->base.emit_bb_start = sibling->emit_bb_start;
5365 ve->base.emit_flush = sibling->emit_flush;
5366 ve->base.emit_init_breadcrumb =
5367 sibling->emit_init_breadcrumb;
5368 ve->base.emit_fini_breadcrumb =
5369 sibling->emit_fini_breadcrumb;
5370 ve->base.emit_fini_breadcrumb_dw =
5371 sibling->emit_fini_breadcrumb_dw;
5372 ve->base.breadcrumbs =
5373 intel_breadcrumbs_get(sibling->breadcrumbs);
5375 ve->base.flags |= sibling->flags;
5377 ve->base.props.timeslice_duration_ms =
5378 sibling->props.timeslice_duration_ms;
5379 ve->base.props.preempt_timeout_ms =
5380 sibling->props.preempt_timeout_ms;
5384 return &ve->context;
5387 intel_context_put(&ve->context);
5388 return ERR_PTR(err);
5391 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5393 struct intel_engine_cs *engine;
5394 intel_engine_mask_t tmp, mask = ve->mask;
5396 for_each_engine_masked(engine, ve->gt, mask, tmp)
5397 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5403 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5404 #include "selftest_guc.c"
5405 #include "selftest_guc_multi_lrc.c"
5406 #include "selftest_guc_hangcheck.c"