Commit | Line | Data |
---|---|---|
dd08ebf6 MB |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2022 Intel Corporation | |
4 | */ | |
5 | ||
ea9f879d LDM |
6 | #include "xe_guc_submit.h" |
7 | ||
dd08ebf6 MB |
8 | #include <linux/bitfield.h> |
9 | #include <linux/bitmap.h> | |
10 | #include <linux/circ_buf.h> | |
11 | #include <linux/delay.h> | |
12 | #include <linux/dma-fence-array.h> | |
13 | ||
14 | #include <drm/drm_managed.h> | |
15 | ||
b67cb798 MW |
16 | #include "abi/guc_actions_abi.h" |
17 | #include "abi/guc_klvs_abi.h" | |
0992884d | 18 | #include "regs/xe_lrc_layout.h" |
c73acc1e | 19 | #include "xe_assert.h" |
e7994850 | 20 | #include "xe_devcoredump.h" |
dd08ebf6 | 21 | #include "xe_device.h" |
c22a4ed0 | 22 | #include "xe_exec_queue.h" |
ea9f879d LDM |
23 | #include "xe_force_wake.h" |
24 | #include "xe_gpu_scheduler.h" | |
25 | #include "xe_gt.h" | |
dd08ebf6 MB |
26 | #include "xe_guc.h" |
27 | #include "xe_guc_ct.h" | |
9b9529ce | 28 | #include "xe_guc_exec_queue_types.h" |
1825c492 | 29 | #include "xe_guc_submit_types.h" |
dd08ebf6 MB |
30 | #include "xe_hw_engine.h" |
31 | #include "xe_hw_fence.h" | |
32 | #include "xe_lrc.h" | |
33 | #include "xe_macros.h" | |
34 | #include "xe_map.h" | |
35 | #include "xe_mocs.h" | |
36 | #include "xe_ring_ops_types.h" | |
37 | #include "xe_sched_job.h" | |
38 | #include "xe_trace.h" | |
39 | #include "xe_vm.h" | |
40 | ||
dd08ebf6 | 41 | static struct xe_guc * |
9b9529ce | 42 | exec_queue_to_guc(struct xe_exec_queue *q) |
dd08ebf6 | 43 | { |
9b9529ce | 44 | return &q->gt->uc.guc; |
dd08ebf6 MB |
45 | } |
46 | ||
47 | /* | |
48 | * Helpers for engine state, using an atomic as some of the bits can transition | |
49 | * as the same time (e.g. a suspend can be happning at the same time as schedule | |
50 | * engine done being processed). | |
51 | */ | |
9b9529ce | 52 | #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) |
dd08ebf6 | 53 | #define ENGINE_STATE_ENABLED (1 << 1) |
9b9529ce FD |
54 | #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) |
55 | #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) | |
56 | #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) | |
dd08ebf6 | 57 | #define ENGINE_STATE_SUSPENDED (1 << 5) |
9b9529ce | 58 | #define EXEC_QUEUE_STATE_RESET (1 << 6) |
dd08ebf6 MB |
59 | #define ENGINE_STATE_KILLED (1 << 7) |
60 | ||
9b9529ce | 61 | static bool exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 62 | { |
9b9529ce | 63 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; |
dd08ebf6 MB |
64 | } |
65 | ||
9b9529ce | 66 | static void set_exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 67 | { |
9b9529ce | 68 | atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); |
dd08ebf6 MB |
69 | } |
70 | ||
9b9529ce | 71 | static void clear_exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 72 | { |
9b9529ce | 73 | atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); |
dd08ebf6 MB |
74 | } |
75 | ||
9b9529ce | 76 | static bool exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 77 | { |
9b9529ce | 78 | return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; |
dd08ebf6 MB |
79 | } |
80 | ||
9b9529ce | 81 | static void set_exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 82 | { |
9b9529ce | 83 | atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); |
dd08ebf6 MB |
84 | } |
85 | ||
9b9529ce | 86 | static void clear_exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 87 | { |
9b9529ce | 88 | atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); |
dd08ebf6 MB |
89 | } |
90 | ||
9b9529ce | 91 | static bool exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 92 | { |
9b9529ce | 93 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; |
dd08ebf6 MB |
94 | } |
95 | ||
9b9529ce | 96 | static void set_exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 97 | { |
9b9529ce | 98 | atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); |
dd08ebf6 MB |
99 | } |
100 | ||
9b9529ce | 101 | static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 102 | { |
9b9529ce | 103 | atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); |
dd08ebf6 MB |
104 | } |
105 | ||
9b9529ce | 106 | static bool exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 107 | { |
9b9529ce | 108 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; |
dd08ebf6 MB |
109 | } |
110 | ||
9b9529ce | 111 | static void set_exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 112 | { |
9b9529ce | 113 | atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); |
dd08ebf6 MB |
114 | } |
115 | ||
9b9529ce | 116 | static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 117 | { |
9b9529ce | 118 | atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); |
dd08ebf6 MB |
119 | } |
120 | ||
9b9529ce | 121 | static bool exec_queue_destroyed(struct xe_exec_queue *q) |
dd08ebf6 | 122 | { |
9b9529ce | 123 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; |
dd08ebf6 MB |
124 | } |
125 | ||
9b9529ce | 126 | static void set_exec_queue_destroyed(struct xe_exec_queue *q) |
dd08ebf6 | 127 | { |
9b9529ce | 128 | atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); |
dd08ebf6 MB |
129 | } |
130 | ||
9b9529ce | 131 | static bool exec_queue_banned(struct xe_exec_queue *q) |
dd08ebf6 | 132 | { |
9b9529ce | 133 | return (q->flags & EXEC_QUEUE_FLAG_BANNED); |
dd08ebf6 MB |
134 | } |
135 | ||
9b9529ce | 136 | static void set_exec_queue_banned(struct xe_exec_queue *q) |
dd08ebf6 | 137 | { |
9b9529ce | 138 | q->flags |= EXEC_QUEUE_FLAG_BANNED; |
dd08ebf6 MB |
139 | } |
140 | ||
9b9529ce | 141 | static bool exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 142 | { |
9b9529ce | 143 | return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; |
dd08ebf6 MB |
144 | } |
145 | ||
9b9529ce | 146 | static void set_exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 147 | { |
9b9529ce | 148 | atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); |
dd08ebf6 MB |
149 | } |
150 | ||
9b9529ce | 151 | static void clear_exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 152 | { |
9b9529ce | 153 | atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); |
dd08ebf6 MB |
154 | } |
155 | ||
9b9529ce | 156 | static bool exec_queue_reset(struct xe_exec_queue *q) |
dd08ebf6 | 157 | { |
9b9529ce | 158 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; |
dd08ebf6 MB |
159 | } |
160 | ||
9b9529ce | 161 | static void set_exec_queue_reset(struct xe_exec_queue *q) |
dd08ebf6 | 162 | { |
9b9529ce | 163 | atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); |
dd08ebf6 MB |
164 | } |
165 | ||
9b9529ce | 166 | static bool exec_queue_killed(struct xe_exec_queue *q) |
dd08ebf6 | 167 | { |
9b9529ce | 168 | return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; |
dd08ebf6 MB |
169 | } |
170 | ||
9b9529ce | 171 | static void set_exec_queue_killed(struct xe_exec_queue *q) |
dd08ebf6 | 172 | { |
9b9529ce | 173 | atomic_or(ENGINE_STATE_KILLED, &q->guc->state); |
dd08ebf6 MB |
174 | } |
175 | ||
9b9529ce | 176 | static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) |
dd08ebf6 | 177 | { |
9b9529ce | 178 | return exec_queue_killed(q) || exec_queue_banned(q); |
dd08ebf6 MB |
179 | } |
180 | ||
a839e365 MB |
181 | #ifdef CONFIG_PROVE_LOCKING |
182 | static int alloc_submit_wq(struct xe_guc *guc) | |
183 | { | |
184 | int i; | |
185 | ||
186 | for (i = 0; i < NUM_SUBMIT_WQ; ++i) { | |
187 | guc->submission_state.submit_wq_pool[i] = | |
188 | alloc_ordered_workqueue("submit_wq", 0); | |
189 | if (!guc->submission_state.submit_wq_pool[i]) | |
190 | goto err_free; | |
191 | } | |
192 | ||
193 | return 0; | |
194 | ||
195 | err_free: | |
196 | while (i) | |
197 | destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); | |
198 | ||
199 | return -ENOMEM; | |
200 | } | |
201 | ||
202 | static void free_submit_wq(struct xe_guc *guc) | |
203 | { | |
204 | int i; | |
205 | ||
206 | for (i = 0; i < NUM_SUBMIT_WQ; ++i) | |
207 | destroy_workqueue(guc->submission_state.submit_wq_pool[i]); | |
208 | } | |
209 | ||
210 | static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) | |
211 | { | |
212 | int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; | |
213 | ||
214 | return guc->submission_state.submit_wq_pool[idx]; | |
215 | } | |
216 | #else | |
217 | static int alloc_submit_wq(struct xe_guc *guc) | |
218 | { | |
219 | return 0; | |
220 | } | |
221 | ||
222 | static void free_submit_wq(struct xe_guc *guc) | |
223 | { | |
224 | ||
225 | } | |
226 | ||
227 | static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) | |
228 | { | |
229 | return NULL; | |
230 | } | |
231 | #endif | |
232 | ||
dd08ebf6 MB |
233 | static void guc_submit_fini(struct drm_device *drm, void *arg) |
234 | { | |
235 | struct xe_guc *guc = arg; | |
236 | ||
9b9529ce | 237 | xa_destroy(&guc->submission_state.exec_queue_lookup); |
dd08ebf6 MB |
238 | ida_destroy(&guc->submission_state.guc_ids); |
239 | bitmap_free(guc->submission_state.guc_ids_bitmap); | |
a839e365 | 240 | free_submit_wq(guc); |
28b1d915 | 241 | mutex_destroy(&guc->submission_state.lock); |
dd08ebf6 MB |
242 | } |
243 | ||
244 | #define GUC_ID_MAX 65535 | |
245 | #define GUC_ID_NUMBER_MLRC 4096 | |
246 | #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) | |
247 | #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC | |
248 | ||
9b9529ce | 249 | static const struct xe_exec_queue_ops guc_exec_queue_ops; |
dd08ebf6 MB |
250 | |
251 | static void primelockdep(struct xe_guc *guc) | |
252 | { | |
253 | if (!IS_ENABLED(CONFIG_LOCKDEP)) | |
254 | return; | |
255 | ||
256 | fs_reclaim_acquire(GFP_KERNEL); | |
257 | ||
258 | mutex_lock(&guc->submission_state.lock); | |
259 | might_lock(&guc->submission_state.suspend.lock); | |
260 | mutex_unlock(&guc->submission_state.lock); | |
261 | ||
262 | fs_reclaim_release(GFP_KERNEL); | |
263 | } | |
264 | ||
265 | int xe_guc_submit_init(struct xe_guc *guc) | |
266 | { | |
267 | struct xe_device *xe = guc_to_xe(guc); | |
268 | struct xe_gt *gt = guc_to_gt(guc); | |
269 | int err; | |
270 | ||
271 | guc->submission_state.guc_ids_bitmap = | |
272 | bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); | |
273 | if (!guc->submission_state.guc_ids_bitmap) | |
274 | return -ENOMEM; | |
275 | ||
a839e365 MB |
276 | err = alloc_submit_wq(guc); |
277 | if (err) { | |
278 | bitmap_free(guc->submission_state.guc_ids_bitmap); | |
279 | return err; | |
280 | } | |
281 | ||
9b9529ce | 282 | gt->exec_queue_ops = &guc_exec_queue_ops; |
dd08ebf6 MB |
283 | |
284 | mutex_init(&guc->submission_state.lock); | |
9b9529ce | 285 | xa_init(&guc->submission_state.exec_queue_lookup); |
dd08ebf6 MB |
286 | ida_init(&guc->submission_state.guc_ids); |
287 | ||
288 | spin_lock_init(&guc->submission_state.suspend.lock); | |
289 | guc->submission_state.suspend.context = dma_fence_context_alloc(1); | |
290 | ||
291 | primelockdep(guc); | |
292 | ||
293 | err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); | |
294 | if (err) | |
295 | return err; | |
296 | ||
297 | return 0; | |
298 | } | |
299 | ||
cb90d469 DCS |
300 | static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) |
301 | { | |
302 | int i; | |
303 | ||
304 | lockdep_assert_held(&guc->submission_state.lock); | |
305 | ||
306 | for (i = 0; i < xa_count; ++i) | |
307 | xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); | |
308 | ||
309 | if (xe_exec_queue_is_parallel(q)) | |
310 | bitmap_release_region(guc->submission_state.guc_ids_bitmap, | |
311 | q->guc->id - GUC_ID_START_MLRC, | |
312 | order_base_2(q->width)); | |
313 | else | |
314 | ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); | |
315 | } | |
316 | ||
9b9529ce | 317 | static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
318 | { |
319 | int ret; | |
320 | void *ptr; | |
cb90d469 | 321 | int i; |
dd08ebf6 MB |
322 | |
323 | /* | |
324 | * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, | |
325 | * worse case user gets -ENOMEM on engine create and has to try again. | |
326 | * | |
327 | * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent | |
328 | * failure. | |
329 | */ | |
330 | lockdep_assert_held(&guc->submission_state.lock); | |
331 | ||
9b9529ce | 332 | if (xe_exec_queue_is_parallel(q)) { |
dd08ebf6 MB |
333 | void *bitmap = guc->submission_state.guc_ids_bitmap; |
334 | ||
335 | ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, | |
9b9529ce | 336 | order_base_2(q->width)); |
dd08ebf6 MB |
337 | } else { |
338 | ret = ida_simple_get(&guc->submission_state.guc_ids, 0, | |
339 | GUC_ID_NUMBER_SLRC, GFP_NOWAIT); | |
340 | } | |
341 | if (ret < 0) | |
342 | return ret; | |
343 | ||
9b9529ce FD |
344 | q->guc->id = ret; |
345 | if (xe_exec_queue_is_parallel(q)) | |
346 | q->guc->id += GUC_ID_START_MLRC; | |
dd08ebf6 | 347 | |
cb90d469 DCS |
348 | for (i = 0; i < q->width; ++i) { |
349 | ptr = xa_store(&guc->submission_state.exec_queue_lookup, | |
350 | q->guc->id + i, q, GFP_NOWAIT); | |
351 | if (IS_ERR(ptr)) { | |
352 | ret = PTR_ERR(ptr); | |
353 | goto err_release; | |
354 | } | |
dd08ebf6 MB |
355 | } |
356 | ||
357 | return 0; | |
358 | ||
359 | err_release: | |
cb90d469 DCS |
360 | __release_guc_id(guc, q, i); |
361 | ||
dd08ebf6 MB |
362 | return ret; |
363 | } | |
364 | ||
9b9529ce | 365 | static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
366 | { |
367 | mutex_lock(&guc->submission_state.lock); | |
cb90d469 | 368 | __release_guc_id(guc, q, q->width); |
dd08ebf6 MB |
369 | mutex_unlock(&guc->submission_state.lock); |
370 | } | |
371 | ||
9b9529ce | 372 | struct exec_queue_policy { |
dd08ebf6 | 373 | u32 count; |
9b9529ce | 374 | struct guc_update_exec_queue_policy h2g; |
dd08ebf6 MB |
375 | }; |
376 | ||
9b9529ce | 377 | static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) |
dd08ebf6 MB |
378 | { |
379 | size_t bytes = sizeof(policy->h2g.header) + | |
380 | (sizeof(policy->h2g.klv[0]) * policy->count); | |
381 | ||
382 | return bytes / sizeof(u32); | |
383 | } | |
384 | ||
9b9529ce FD |
385 | static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, |
386 | u16 guc_id) | |
dd08ebf6 MB |
387 | { |
388 | policy->h2g.header.action = | |
389 | XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; | |
390 | policy->h2g.header.guc_id = guc_id; | |
391 | policy->count = 0; | |
392 | } | |
393 | ||
9b9529ce FD |
394 | #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ |
395 | static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ | |
dd08ebf6 MB |
396 | u32 data) \ |
397 | { \ | |
99fea682 | 398 | XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ |
3e8e7ee6 | 399 | \ |
dd08ebf6 MB |
400 | policy->h2g.klv[policy->count].kl = \ |
401 | FIELD_PREP(GUC_KLV_0_KEY, \ | |
402 | GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ | |
403 | FIELD_PREP(GUC_KLV_0_LEN, 1); \ | |
404 | policy->h2g.klv[policy->count].value = data; \ | |
405 | policy->count++; \ | |
406 | } | |
407 | ||
9b9529ce FD |
408 | MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) |
409 | MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) | |
410 | MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) | |
411 | #undef MAKE_EXEC_QUEUE_POLICY_ADD | |
dd08ebf6 | 412 | |
9b9529ce FD |
413 | static const int xe_exec_queue_prio_to_guc[] = { |
414 | [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, | |
415 | [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, | |
416 | [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, | |
417 | [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, | |
dd08ebf6 MB |
418 | }; |
419 | ||
9b9529ce | 420 | static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 421 | { |
9b9529ce | 422 | struct exec_queue_policy policy; |
c73acc1e | 423 | struct xe_device *xe = guc_to_xe(guc); |
19c02225 | 424 | enum xe_exec_queue_priority prio = q->sched_props.priority; |
9b9529ce FD |
425 | u32 timeslice_us = q->sched_props.timeslice_us; |
426 | u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; | |
dd08ebf6 | 427 | |
c73acc1e | 428 | xe_assert(xe, exec_queue_registered(q)); |
dd08ebf6 | 429 | |
9b9529ce FD |
430 | __guc_exec_queue_policy_start_klv(&policy, q->guc->id); |
431 | __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); | |
432 | __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); | |
433 | __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); | |
dd08ebf6 MB |
434 | |
435 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
9b9529ce | 436 | __guc_exec_queue_policy_action_size(&policy), 0, 0); |
dd08ebf6 MB |
437 | } |
438 | ||
9b9529ce | 439 | static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 440 | { |
9b9529ce | 441 | struct exec_queue_policy policy; |
dd08ebf6 | 442 | |
9b9529ce FD |
443 | __guc_exec_queue_policy_start_klv(&policy, q->guc->id); |
444 | __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); | |
dd08ebf6 MB |
445 | |
446 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
9b9529ce | 447 | __guc_exec_queue_policy_action_size(&policy), 0, 0); |
dd08ebf6 MB |
448 | } |
449 | ||
dd08ebf6 | 450 | #define parallel_read(xe_, map_, field_) \ |
1825c492 RV |
451 | xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ |
452 | field_) | |
dd08ebf6 | 453 | #define parallel_write(xe_, map_, field_, val_) \ |
1825c492 RV |
454 | xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ |
455 | field_, val_) | |
dd08ebf6 MB |
456 | |
457 | static void __register_mlrc_engine(struct xe_guc *guc, | |
9b9529ce | 458 | struct xe_exec_queue *q, |
dd08ebf6 MB |
459 | struct guc_ctxt_registration_info *info) |
460 | { | |
461 | #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) | |
c73acc1e | 462 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 MB |
463 | u32 action[MAX_MLRC_REG_SIZE]; |
464 | int len = 0; | |
465 | int i; | |
466 | ||
c73acc1e | 467 | xe_assert(xe, xe_exec_queue_is_parallel(q)); |
dd08ebf6 MB |
468 | |
469 | action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; | |
470 | action[len++] = info->flags; | |
471 | action[len++] = info->context_idx; | |
472 | action[len++] = info->engine_class; | |
473 | action[len++] = info->engine_submit_mask; | |
474 | action[len++] = info->wq_desc_lo; | |
475 | action[len++] = info->wq_desc_hi; | |
476 | action[len++] = info->wq_base_lo; | |
477 | action[len++] = info->wq_base_hi; | |
478 | action[len++] = info->wq_size; | |
9b9529ce | 479 | action[len++] = q->width; |
dd08ebf6 MB |
480 | action[len++] = info->hwlrca_lo; |
481 | action[len++] = info->hwlrca_hi; | |
482 | ||
9b9529ce FD |
483 | for (i = 1; i < q->width; ++i) { |
484 | struct xe_lrc *lrc = q->lrc + i; | |
dd08ebf6 MB |
485 | |
486 | action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); | |
487 | action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); | |
488 | } | |
489 | ||
c73acc1e | 490 | xe_assert(xe, len <= MAX_MLRC_REG_SIZE); |
dd08ebf6 MB |
491 | #undef MAX_MLRC_REG_SIZE |
492 | ||
493 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
494 | } | |
495 | ||
496 | static void __register_engine(struct xe_guc *guc, | |
497 | struct guc_ctxt_registration_info *info) | |
498 | { | |
499 | u32 action[] = { | |
500 | XE_GUC_ACTION_REGISTER_CONTEXT, | |
501 | info->flags, | |
502 | info->context_idx, | |
503 | info->engine_class, | |
504 | info->engine_submit_mask, | |
505 | info->wq_desc_lo, | |
506 | info->wq_desc_hi, | |
507 | info->wq_base_lo, | |
508 | info->wq_base_hi, | |
509 | info->wq_size, | |
510 | info->hwlrca_lo, | |
511 | info->hwlrca_hi, | |
512 | }; | |
513 | ||
514 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); | |
515 | } | |
516 | ||
9b9529ce | 517 | static void register_engine(struct xe_exec_queue *q) |
dd08ebf6 | 518 | { |
9b9529ce | 519 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 520 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 521 | struct xe_lrc *lrc = q->lrc; |
dd08ebf6 MB |
522 | struct guc_ctxt_registration_info info; |
523 | ||
c73acc1e | 524 | xe_assert(xe, !exec_queue_registered(q)); |
dd08ebf6 MB |
525 | |
526 | memset(&info, 0, sizeof(info)); | |
9b9529ce FD |
527 | info.context_idx = q->guc->id; |
528 | info.engine_class = xe_engine_class_to_guc_class(q->class); | |
529 | info.engine_submit_mask = q->logical_mask; | |
dd08ebf6 MB |
530 | info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); |
531 | info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); | |
532 | info.flags = CONTEXT_REGISTRATION_FLAG_KMD; | |
533 | ||
9b9529ce | 534 | if (xe_exec_queue_is_parallel(q)) { |
dd08ebf6 MB |
535 | u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); |
536 | struct iosys_map map = xe_lrc_parallel_map(lrc); | |
537 | ||
538 | info.wq_desc_lo = lower_32_bits(ggtt_addr + | |
1825c492 | 539 | offsetof(struct guc_submit_parallel_scratch, wq_desc)); |
dd08ebf6 | 540 | info.wq_desc_hi = upper_32_bits(ggtt_addr + |
1825c492 | 541 | offsetof(struct guc_submit_parallel_scratch, wq_desc)); |
dd08ebf6 | 542 | info.wq_base_lo = lower_32_bits(ggtt_addr + |
1825c492 | 543 | offsetof(struct guc_submit_parallel_scratch, wq[0])); |
dd08ebf6 | 544 | info.wq_base_hi = upper_32_bits(ggtt_addr + |
1825c492 | 545 | offsetof(struct guc_submit_parallel_scratch, wq[0])); |
dd08ebf6 MB |
546 | info.wq_size = WQ_SIZE; |
547 | ||
9b9529ce FD |
548 | q->guc->wqi_head = 0; |
549 | q->guc->wqi_tail = 0; | |
dd08ebf6 MB |
550 | xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); |
551 | parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); | |
552 | } | |
553 | ||
8ae8a2e8 MB |
554 | /* |
555 | * We must keep a reference for LR engines if engine is registered with | |
556 | * the GuC as jobs signal immediately and can't destroy an engine if the | |
557 | * GuC has a reference to it. | |
558 | */ | |
9b9529ce FD |
559 | if (xe_exec_queue_is_lr(q)) |
560 | xe_exec_queue_get(q); | |
8ae8a2e8 | 561 | |
9b9529ce FD |
562 | set_exec_queue_registered(q); |
563 | trace_xe_exec_queue_register(q); | |
564 | if (xe_exec_queue_is_parallel(q)) | |
565 | __register_mlrc_engine(guc, q, &info); | |
dd08ebf6 MB |
566 | else |
567 | __register_engine(guc, &info); | |
9b9529ce | 568 | init_policies(guc, q); |
dd08ebf6 MB |
569 | } |
570 | ||
9b9529ce | 571 | static u32 wq_space_until_wrap(struct xe_exec_queue *q) |
dd08ebf6 | 572 | { |
9b9529ce | 573 | return (WQ_SIZE - q->guc->wqi_tail); |
dd08ebf6 MB |
574 | } |
575 | ||
9b9529ce | 576 | static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) |
dd08ebf6 | 577 | { |
9b9529ce | 578 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 579 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 580 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
dd08ebf6 MB |
581 | unsigned int sleep_period_ms = 1; |
582 | ||
583 | #define AVAILABLE_SPACE \ | |
9b9529ce | 584 | CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) |
dd08ebf6 MB |
585 | if (wqi_size > AVAILABLE_SPACE) { |
586 | try_again: | |
9b9529ce | 587 | q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); |
dd08ebf6 MB |
588 | if (wqi_size > AVAILABLE_SPACE) { |
589 | if (sleep_period_ms == 1024) { | |
9b9529ce | 590 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
591 | return -ENODEV; |
592 | } | |
593 | ||
594 | msleep(sleep_period_ms); | |
595 | sleep_period_ms <<= 1; | |
596 | goto try_again; | |
597 | } | |
598 | } | |
599 | #undef AVAILABLE_SPACE | |
600 | ||
601 | return 0; | |
602 | } | |
603 | ||
9b9529ce | 604 | static int wq_noop_append(struct xe_exec_queue *q) |
dd08ebf6 | 605 | { |
9b9529ce | 606 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 607 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce FD |
608 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
609 | u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; | |
dd08ebf6 | 610 | |
9b9529ce | 611 | if (wq_wait_for_space(q, wq_space_until_wrap(q))) |
dd08ebf6 MB |
612 | return -ENODEV; |
613 | ||
c73acc1e | 614 | xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); |
dd08ebf6 | 615 | |
9b9529ce | 616 | parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], |
dd08ebf6 MB |
617 | FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | |
618 | FIELD_PREP(WQ_LEN_MASK, len_dw)); | |
9b9529ce | 619 | q->guc->wqi_tail = 0; |
dd08ebf6 MB |
620 | |
621 | return 0; | |
622 | } | |
623 | ||
9b9529ce | 624 | static void wq_item_append(struct xe_exec_queue *q) |
dd08ebf6 | 625 | { |
9b9529ce | 626 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 627 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 628 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
e3828ebf MB |
629 | #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ |
630 | u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; | |
9b9529ce | 631 | u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); |
dd08ebf6 MB |
632 | u32 len_dw = (wqi_size / sizeof(u32)) - 1; |
633 | int i = 0, j; | |
634 | ||
9b9529ce FD |
635 | if (wqi_size > wq_space_until_wrap(q)) { |
636 | if (wq_noop_append(q)) | |
dd08ebf6 MB |
637 | return; |
638 | } | |
9b9529ce | 639 | if (wq_wait_for_space(q, wqi_size)) |
dd08ebf6 MB |
640 | return; |
641 | ||
642 | wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | | |
643 | FIELD_PREP(WQ_LEN_MASK, len_dw); | |
9b9529ce FD |
644 | wqi[i++] = xe_lrc_descriptor(q->lrc); |
645 | wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | | |
646 | FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); | |
dd08ebf6 | 647 | wqi[i++] = 0; |
9b9529ce FD |
648 | for (j = 1; j < q->width; ++j) { |
649 | struct xe_lrc *lrc = q->lrc + j; | |
dd08ebf6 MB |
650 | |
651 | wqi[i++] = lrc->ring.tail / sizeof(u64); | |
652 | } | |
653 | ||
c73acc1e | 654 | xe_assert(xe, i == wqi_size / sizeof(u32)); |
dd08ebf6 | 655 | |
1825c492 | 656 | iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, |
9b9529ce | 657 | wq[q->guc->wqi_tail / sizeof(u32)])); |
dd08ebf6 | 658 | xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); |
9b9529ce | 659 | q->guc->wqi_tail += wqi_size; |
c73acc1e | 660 | xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); |
dd08ebf6 MB |
661 | |
662 | xe_device_wmb(xe); | |
663 | ||
9b9529ce FD |
664 | map = xe_lrc_parallel_map(q->lrc); |
665 | parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); | |
dd08ebf6 MB |
666 | } |
667 | ||
668 | #define RESUME_PENDING ~0x0ull | |
9b9529ce | 669 | static void submit_exec_queue(struct xe_exec_queue *q) |
dd08ebf6 | 670 | { |
9b9529ce | 671 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 672 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 673 | struct xe_lrc *lrc = q->lrc; |
dd08ebf6 MB |
674 | u32 action[3]; |
675 | u32 g2h_len = 0; | |
676 | u32 num_g2h = 0; | |
677 | int len = 0; | |
678 | bool extra_submit = false; | |
679 | ||
c73acc1e | 680 | xe_assert(xe, exec_queue_registered(q)); |
dd08ebf6 | 681 | |
9b9529ce FD |
682 | if (xe_exec_queue_is_parallel(q)) |
683 | wq_item_append(q); | |
dd08ebf6 MB |
684 | else |
685 | xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); | |
686 | ||
9b9529ce | 687 | if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) |
dd08ebf6 MB |
688 | return; |
689 | ||
9b9529ce | 690 | if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { |
dd08ebf6 | 691 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; |
9b9529ce | 692 | action[len++] = q->guc->id; |
dd08ebf6 MB |
693 | action[len++] = GUC_CONTEXT_ENABLE; |
694 | g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; | |
695 | num_g2h = 1; | |
9b9529ce | 696 | if (xe_exec_queue_is_parallel(q)) |
dd08ebf6 MB |
697 | extra_submit = true; |
698 | ||
9b9529ce FD |
699 | q->guc->resume_time = RESUME_PENDING; |
700 | set_exec_queue_pending_enable(q); | |
701 | set_exec_queue_enabled(q); | |
702 | trace_xe_exec_queue_scheduling_enable(q); | |
dd08ebf6 MB |
703 | } else { |
704 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
9b9529ce FD |
705 | action[len++] = q->guc->id; |
706 | trace_xe_exec_queue_submit(q); | |
dd08ebf6 MB |
707 | } |
708 | ||
709 | xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); | |
710 | ||
711 | if (extra_submit) { | |
712 | len = 0; | |
713 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
9b9529ce FD |
714 | action[len++] = q->guc->id; |
715 | trace_xe_exec_queue_submit(q); | |
dd08ebf6 MB |
716 | |
717 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
718 | } | |
719 | } | |
720 | ||
721 | static struct dma_fence * | |
9b9529ce | 722 | guc_exec_queue_run_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
723 | { |
724 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
9b9529ce | 725 | struct xe_exec_queue *q = job->q; |
c73acc1e FD |
726 | struct xe_guc *guc = exec_queue_to_guc(q); |
727 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 728 | bool lr = xe_exec_queue_is_lr(q); |
dd08ebf6 | 729 | |
c73acc1e FD |
730 | xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || |
731 | exec_queue_banned(q) || exec_queue_suspended(q)); | |
dd08ebf6 MB |
732 | |
733 | trace_xe_sched_job_run(job); | |
734 | ||
9b9529ce FD |
735 | if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { |
736 | if (!exec_queue_registered(q)) | |
737 | register_engine(q); | |
8ae8a2e8 | 738 | if (!lr) /* LR jobs are emitted in the exec IOCTL */ |
9b9529ce FD |
739 | q->ring_ops->emit_job(job); |
740 | submit_exec_queue(q); | |
dd08ebf6 MB |
741 | } |
742 | ||
8ae8a2e8 MB |
743 | if (lr) { |
744 | xe_sched_job_set_error(job, -EOPNOTSUPP); | |
745 | return NULL; | |
746 | } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { | |
dd08ebf6 | 747 | return job->fence; |
8ae8a2e8 | 748 | } else { |
dd08ebf6 | 749 | return dma_fence_get(job->fence); |
8ae8a2e8 | 750 | } |
dd08ebf6 MB |
751 | } |
752 | ||
9b9529ce | 753 | static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
754 | { |
755 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
756 | ||
757 | trace_xe_sched_job_free(job); | |
758 | xe_sched_job_put(job); | |
759 | } | |
760 | ||
761 | static int guc_read_stopped(struct xe_guc *guc) | |
762 | { | |
763 | return atomic_read(&guc->submission_state.stopped); | |
764 | } | |
765 | ||
9b9529ce | 766 | #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ |
dd08ebf6 MB |
767 | u32 action[] = { \ |
768 | XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ | |
9b9529ce | 769 | q->guc->id, \ |
dd08ebf6 MB |
770 | GUC_CONTEXT_##enable_disable, \ |
771 | } | |
772 | ||
773 | static void disable_scheduling_deregister(struct xe_guc *guc, | |
9b9529ce | 774 | struct xe_exec_queue *q) |
dd08ebf6 | 775 | { |
9b9529ce | 776 | MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); |
5c0553cd | 777 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 MB |
778 | int ret; |
779 | ||
9b9529ce | 780 | set_min_preemption_timeout(guc, q); |
dd08ebf6 | 781 | smp_rmb(); |
9b9529ce | 782 | ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || |
dd08ebf6 MB |
783 | guc_read_stopped(guc), HZ * 5); |
784 | if (!ret) { | |
9b9529ce | 785 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 | 786 | |
5c0553cd | 787 | drm_warn(&xe->drm, "Pending enable failed to respond"); |
dd08ebf6 | 788 | xe_sched_submission_start(sched); |
9b9529ce | 789 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
790 | xe_sched_tdr_queue_imm(sched); |
791 | return; | |
792 | } | |
793 | ||
9b9529ce FD |
794 | clear_exec_queue_enabled(q); |
795 | set_exec_queue_pending_disable(q); | |
796 | set_exec_queue_destroyed(q); | |
797 | trace_xe_exec_queue_scheduling_disable(q); | |
dd08ebf6 MB |
798 | |
799 | /* | |
800 | * Reserve space for both G2H here as the 2nd G2H is sent from a G2H | |
801 | * handler and we are not allowed to reserved G2H space in handlers. | |
802 | */ | |
803 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
804 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + | |
805 | G2H_LEN_DW_DEREGISTER_CONTEXT, 2); | |
806 | } | |
807 | ||
9b9529ce | 808 | static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); |
dd08ebf6 MB |
809 | |
810 | #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) | |
9b9529ce | 811 | static void simple_error_capture(struct xe_exec_queue *q) |
dd08ebf6 | 812 | { |
9b9529ce | 813 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 MB |
814 | struct drm_printer p = drm_err_printer(""); |
815 | struct xe_hw_engine *hwe; | |
816 | enum xe_hw_engine_id id; | |
9b9529ce FD |
817 | u32 adj_logical_mask = q->logical_mask; |
818 | u32 width_mask = (0x1 << q->width) - 1; | |
dd08ebf6 MB |
819 | int i; |
820 | bool cookie; | |
821 | ||
9b9529ce FD |
822 | if (q->vm && !q->vm->error_capture.capture_once) { |
823 | q->vm->error_capture.capture_once = true; | |
dd08ebf6 | 824 | cookie = dma_fence_begin_signalling(); |
9b9529ce | 825 | for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { |
dd08ebf6 MB |
826 | if (adj_logical_mask & BIT(i)) { |
827 | adj_logical_mask |= width_mask << i; | |
9b9529ce | 828 | i += q->width; |
dd08ebf6 MB |
829 | } else { |
830 | ++i; | |
831 | } | |
832 | } | |
833 | ||
834 | xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); | |
513260df | 835 | xe_guc_ct_print(&guc->ct, &p, true); |
9b9529ce | 836 | guc_exec_queue_print(q, &p); |
dd08ebf6 | 837 | for_each_hw_engine(hwe, guc_to_gt(guc), id) { |
9b9529ce | 838 | if (hwe->class != q->hwe->class || |
dd08ebf6 MB |
839 | !(BIT(hwe->logical_instance) & adj_logical_mask)) |
840 | continue; | |
a4db5555 | 841 | xe_hw_engine_print(hwe, &p); |
dd08ebf6 | 842 | } |
9b9529ce | 843 | xe_analyze_vm(&p, q->vm, q->gt->info.id); |
dd08ebf6 MB |
844 | xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); |
845 | dma_fence_end_signalling(cookie); | |
846 | } | |
847 | } | |
848 | #else | |
9b9529ce | 849 | static void simple_error_capture(struct xe_exec_queue *q) |
dd08ebf6 MB |
850 | { |
851 | } | |
852 | #endif | |
853 | ||
9b9529ce | 854 | static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) |
8ae8a2e8 | 855 | { |
9b9529ce | 856 | struct xe_guc *guc = exec_queue_to_guc(q); |
e670f0b4 BK |
857 | struct xe_device *xe = guc_to_xe(guc); |
858 | ||
859 | /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ | |
860 | wake_up_all(&xe->ufence_wq); | |
8ae8a2e8 | 861 | |
9b9529ce FD |
862 | if (xe_exec_queue_is_lr(q)) |
863 | queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); | |
8ae8a2e8 | 864 | else |
9b9529ce | 865 | xe_sched_tdr_queue_imm(&q->guc->sched); |
8ae8a2e8 MB |
866 | } |
867 | ||
9b9529ce | 868 | static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) |
8ae8a2e8 | 869 | { |
9b9529ce FD |
870 | struct xe_guc_exec_queue *ge = |
871 | container_of(w, struct xe_guc_exec_queue, lr_tdr); | |
872 | struct xe_exec_queue *q = ge->q; | |
5c0553cd FD |
873 | struct xe_guc *guc = exec_queue_to_guc(q); |
874 | struct xe_device *xe = guc_to_xe(guc); | |
8ae8a2e8 MB |
875 | struct xe_gpu_scheduler *sched = &ge->sched; |
876 | ||
c73acc1e | 877 | xe_assert(xe, xe_exec_queue_is_lr(q)); |
9b9529ce | 878 | trace_xe_exec_queue_lr_cleanup(q); |
8ae8a2e8 MB |
879 | |
880 | /* Kill the run_job / process_msg entry points */ | |
881 | xe_sched_submission_stop(sched); | |
882 | ||
31b57683 MA |
883 | /* |
884 | * Engine state now mostly stable, disable scheduling / deregister if | |
885 | * needed. This cleanup routine might be called multiple times, where | |
886 | * the actual async engine deregister drops the final engine ref. | |
887 | * Calling disable_scheduling_deregister will mark the engine as | |
888 | * destroyed and fire off the CT requests to disable scheduling / | |
889 | * deregister, which we only want to do once. We also don't want to mark | |
890 | * the engine as pending_disable again as this may race with the | |
891 | * xe_guc_deregister_done_handler() which treats it as an unexpected | |
892 | * state. | |
893 | */ | |
894 | if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { | |
9b9529ce | 895 | struct xe_guc *guc = exec_queue_to_guc(q); |
8ae8a2e8 MB |
896 | int ret; |
897 | ||
9b9529ce FD |
898 | set_exec_queue_banned(q); |
899 | disable_scheduling_deregister(guc, q); | |
8ae8a2e8 MB |
900 | |
901 | /* | |
902 | * Must wait for scheduling to be disabled before signalling | |
903 | * any fences, if GT broken the GT reset code should signal us. | |
904 | */ | |
905 | ret = wait_event_timeout(guc->ct.wq, | |
9b9529ce | 906 | !exec_queue_pending_disable(q) || |
8ae8a2e8 MB |
907 | guc_read_stopped(guc), HZ * 5); |
908 | if (!ret) { | |
5c0553cd | 909 | drm_warn(&xe->drm, "Schedule disable failed to respond"); |
8ae8a2e8 | 910 | xe_sched_submission_start(sched); |
9b9529ce | 911 | xe_gt_reset_async(q->gt); |
8ae8a2e8 MB |
912 | return; |
913 | } | |
914 | } | |
915 | ||
916 | xe_sched_submission_start(sched); | |
917 | } | |
918 | ||
dd08ebf6 | 919 | static enum drm_gpu_sched_stat |
9b9529ce | 920 | guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
921 | { |
922 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
923 | struct xe_sched_job *tmp_job; | |
9b9529ce FD |
924 | struct xe_exec_queue *q = job->q; |
925 | struct xe_gpu_scheduler *sched = &q->guc->sched; | |
926 | struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); | |
dd08ebf6 MB |
927 | int err = -ETIME; |
928 | int i = 0; | |
929 | ||
930 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { | |
c73acc1e FD |
931 | xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); |
932 | xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); | |
dd08ebf6 MB |
933 | |
934 | drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", | |
9b9529ce FD |
935 | xe_sched_job_seqno(job), q->guc->id, q->flags); |
936 | simple_error_capture(q); | |
937 | xe_devcoredump(q); | |
dd08ebf6 MB |
938 | } else { |
939 | drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", | |
9b9529ce | 940 | xe_sched_job_seqno(job), q->guc->id, q->flags); |
dd08ebf6 MB |
941 | } |
942 | trace_xe_sched_job_timedout(job); | |
943 | ||
944 | /* Kill the run_job entry point */ | |
945 | xe_sched_submission_stop(sched); | |
946 | ||
947 | /* | |
948 | * Kernel jobs should never fail, nor should VM jobs if they do | |
949 | * somethings has gone wrong and the GT needs a reset | |
950 | */ | |
9b9529ce FD |
951 | if (q->flags & EXEC_QUEUE_FLAG_KERNEL || |
952 | (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { | |
dd08ebf6 MB |
953 | if (!xe_sched_invalidate_job(job, 2)) { |
954 | xe_sched_add_pending_job(sched, job); | |
955 | xe_sched_submission_start(sched); | |
9b9529ce | 956 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
957 | goto out; |
958 | } | |
959 | } | |
960 | ||
961 | /* Engine state now stable, disable scheduling if needed */ | |
ef6ea972 | 962 | if (exec_queue_registered(q)) { |
9b9529ce | 963 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 MB |
964 | int ret; |
965 | ||
9b9529ce | 966 | if (exec_queue_reset(q)) |
dd08ebf6 | 967 | err = -EIO; |
9b9529ce | 968 | set_exec_queue_banned(q); |
ef6ea972 MA |
969 | if (!exec_queue_destroyed(q)) { |
970 | xe_exec_queue_get(q); | |
971 | disable_scheduling_deregister(guc, q); | |
972 | } | |
dd08ebf6 MB |
973 | |
974 | /* | |
975 | * Must wait for scheduling to be disabled before signalling | |
976 | * any fences, if GT broken the GT reset code should signal us. | |
977 | * | |
978 | * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault | |
979 | * error) messages which can cause the schedule disable to get | |
980 | * lost. If this occurs, trigger a GT reset to recover. | |
981 | */ | |
982 | smp_rmb(); | |
983 | ret = wait_event_timeout(guc->ct.wq, | |
9b9529ce | 984 | !exec_queue_pending_disable(q) || |
dd08ebf6 | 985 | guc_read_stopped(guc), HZ * 5); |
ef6ea972 | 986 | if (!ret || guc_read_stopped(guc)) { |
5c0553cd | 987 | drm_warn(&xe->drm, "Schedule disable failed to respond"); |
dd08ebf6 MB |
988 | xe_sched_add_pending_job(sched, job); |
989 | xe_sched_submission_start(sched); | |
9b9529ce | 990 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
991 | xe_sched_tdr_queue_imm(sched); |
992 | goto out; | |
993 | } | |
994 | } | |
995 | ||
996 | /* Stop fence signaling */ | |
9b9529ce | 997 | xe_hw_fence_irq_stop(q->fence_irq); |
dd08ebf6 MB |
998 | |
999 | /* | |
1000 | * Fence state now stable, stop / start scheduler which cleans up any | |
1001 | * fences that are complete | |
1002 | */ | |
1003 | xe_sched_add_pending_job(sched, job); | |
1004 | xe_sched_submission_start(sched); | |
9b9529ce | 1005 | xe_guc_exec_queue_trigger_cleanup(q); |
dd08ebf6 MB |
1006 | |
1007 | /* Mark all outstanding jobs as bad, thus completing them */ | |
1008 | spin_lock(&sched->base.job_list_lock); | |
1009 | list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) | |
1010 | xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); | |
1011 | spin_unlock(&sched->base.job_list_lock); | |
1012 | ||
1013 | /* Start fence signaling */ | |
9b9529ce | 1014 | xe_hw_fence_irq_start(q->fence_irq); |
dd08ebf6 MB |
1015 | |
1016 | out: | |
1017 | return DRM_GPU_SCHED_STAT_NOMINAL; | |
1018 | } | |
1019 | ||
9b9529ce | 1020 | static void __guc_exec_queue_fini_async(struct work_struct *w) |
dd08ebf6 | 1021 | { |
9b9529ce FD |
1022 | struct xe_guc_exec_queue *ge = |
1023 | container_of(w, struct xe_guc_exec_queue, fini_async); | |
1024 | struct xe_exec_queue *q = ge->q; | |
1025 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1026 | |
9b9529ce | 1027 | trace_xe_exec_queue_destroy(q); |
dd08ebf6 | 1028 | |
9b9529ce | 1029 | if (xe_exec_queue_is_lr(q)) |
8ae8a2e8 | 1030 | cancel_work_sync(&ge->lr_tdr); |
9b9529ce | 1031 | release_guc_id(guc, q); |
dd08ebf6 MB |
1032 | xe_sched_entity_fini(&ge->entity); |
1033 | xe_sched_fini(&ge->sched); | |
1034 | ||
a20c75db MB |
1035 | kfree(ge); |
1036 | xe_exec_queue_fini(q); | |
dd08ebf6 MB |
1037 | } |
1038 | ||
9b9529ce | 1039 | static void guc_exec_queue_fini_async(struct xe_exec_queue *q) |
dd08ebf6 | 1040 | { |
9b9529ce | 1041 | INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); |
dd08ebf6 MB |
1042 | |
1043 | /* We must block on kernel engines so slabs are empty on driver unload */ | |
923e4238 | 1044 | if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) |
a20c75db MB |
1045 | __guc_exec_queue_fini_async(&q->guc->fini_async); |
1046 | else | |
1047 | queue_work(system_wq, &q->guc->fini_async); | |
dd08ebf6 MB |
1048 | } |
1049 | ||
9b9529ce | 1050 | static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
1051 | { |
1052 | /* | |
1053 | * Might be done from within the GPU scheduler, need to do async as we | |
1054 | * fini the scheduler when the engine is fini'd, the scheduler can't | |
1055 | * complete fini within itself (circular dependency). Async resolves | |
1056 | * this we and don't really care when everything is fini'd, just that it | |
1057 | * is. | |
1058 | */ | |
9b9529ce | 1059 | guc_exec_queue_fini_async(q); |
dd08ebf6 MB |
1060 | } |
1061 | ||
9b9529ce | 1062 | static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) |
dd08ebf6 | 1063 | { |
9b9529ce FD |
1064 | struct xe_exec_queue *q = msg->private_data; |
1065 | struct xe_guc *guc = exec_queue_to_guc(q); | |
c73acc1e | 1066 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1067 | |
c73acc1e | 1068 | xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); |
9b9529ce | 1069 | trace_xe_exec_queue_cleanup_entity(q); |
dd08ebf6 | 1070 | |
9b9529ce FD |
1071 | if (exec_queue_registered(q)) |
1072 | disable_scheduling_deregister(guc, q); | |
dd08ebf6 | 1073 | else |
9b9529ce | 1074 | __guc_exec_queue_fini(guc, q); |
dd08ebf6 MB |
1075 | } |
1076 | ||
9b9529ce | 1077 | static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) |
dd08ebf6 | 1078 | { |
9b9529ce | 1079 | return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); |
dd08ebf6 MB |
1080 | } |
1081 | ||
9b9529ce | 1082 | static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) |
dd08ebf6 | 1083 | { |
9b9529ce FD |
1084 | struct xe_exec_queue *q = msg->private_data; |
1085 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1086 | |
9b9529ce FD |
1087 | if (guc_exec_queue_allowed_to_change_state(q)) |
1088 | init_policies(guc, q); | |
dd08ebf6 MB |
1089 | kfree(msg); |
1090 | } | |
1091 | ||
9b9529ce | 1092 | static void suspend_fence_signal(struct xe_exec_queue *q) |
dd08ebf6 | 1093 | { |
9b9529ce | 1094 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 1095 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1096 | |
c73acc1e FD |
1097 | xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || |
1098 | guc_read_stopped(guc)); | |
1099 | xe_assert(xe, q->guc->suspend_pending); | |
dd08ebf6 | 1100 | |
9b9529ce | 1101 | q->guc->suspend_pending = false; |
dd08ebf6 | 1102 | smp_wmb(); |
9b9529ce | 1103 | wake_up(&q->guc->suspend_wait); |
dd08ebf6 MB |
1104 | } |
1105 | ||
9b9529ce | 1106 | static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) |
dd08ebf6 | 1107 | { |
9b9529ce FD |
1108 | struct xe_exec_queue *q = msg->private_data; |
1109 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1110 | |
9b9529ce FD |
1111 | if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && |
1112 | exec_queue_enabled(q)) { | |
1113 | wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || | |
dd08ebf6 MB |
1114 | guc_read_stopped(guc)); |
1115 | ||
1116 | if (!guc_read_stopped(guc)) { | |
9b9529ce | 1117 | MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); |
dd08ebf6 MB |
1118 | s64 since_resume_ms = |
1119 | ktime_ms_delta(ktime_get(), | |
9b9529ce FD |
1120 | q->guc->resume_time); |
1121 | s64 wait_ms = q->vm->preempt.min_run_period_ms - | |
dd08ebf6 MB |
1122 | since_resume_ms; |
1123 | ||
9b9529ce | 1124 | if (wait_ms > 0 && q->guc->resume_time) |
dd08ebf6 MB |
1125 | msleep(wait_ms); |
1126 | ||
9b9529ce FD |
1127 | set_exec_queue_suspended(q); |
1128 | clear_exec_queue_enabled(q); | |
1129 | set_exec_queue_pending_disable(q); | |
1130 | trace_xe_exec_queue_scheduling_disable(q); | |
dd08ebf6 MB |
1131 | |
1132 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1133 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1134 | } | |
9b9529ce FD |
1135 | } else if (q->guc->suspend_pending) { |
1136 | set_exec_queue_suspended(q); | |
1137 | suspend_fence_signal(q); | |
dd08ebf6 MB |
1138 | } |
1139 | } | |
1140 | ||
9b9529ce | 1141 | static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) |
dd08ebf6 | 1142 | { |
9b9529ce FD |
1143 | struct xe_exec_queue *q = msg->private_data; |
1144 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1145 | |
9b9529ce FD |
1146 | if (guc_exec_queue_allowed_to_change_state(q)) { |
1147 | MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); | |
dd08ebf6 | 1148 | |
9b9529ce FD |
1149 | q->guc->resume_time = RESUME_PENDING; |
1150 | clear_exec_queue_suspended(q); | |
1151 | set_exec_queue_pending_enable(q); | |
1152 | set_exec_queue_enabled(q); | |
1153 | trace_xe_exec_queue_scheduling_enable(q); | |
dd08ebf6 MB |
1154 | |
1155 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1156 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1157 | } else { | |
9b9529ce | 1158 | clear_exec_queue_suspended(q); |
dd08ebf6 MB |
1159 | } |
1160 | } | |
1161 | ||
1162 | #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ | |
1163 | #define SET_SCHED_PROPS 2 | |
1164 | #define SUSPEND 3 | |
1165 | #define RESUME 4 | |
1166 | ||
9b9529ce | 1167 | static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) |
dd08ebf6 MB |
1168 | { |
1169 | trace_xe_sched_msg_recv(msg); | |
1170 | ||
1171 | switch (msg->opcode) { | |
1172 | case CLEANUP: | |
9b9529ce | 1173 | __guc_exec_queue_process_msg_cleanup(msg); |
dd08ebf6 MB |
1174 | break; |
1175 | case SET_SCHED_PROPS: | |
9b9529ce | 1176 | __guc_exec_queue_process_msg_set_sched_props(msg); |
dd08ebf6 MB |
1177 | break; |
1178 | case SUSPEND: | |
9b9529ce | 1179 | __guc_exec_queue_process_msg_suspend(msg); |
dd08ebf6 MB |
1180 | break; |
1181 | case RESUME: | |
9b9529ce | 1182 | __guc_exec_queue_process_msg_resume(msg); |
dd08ebf6 MB |
1183 | break; |
1184 | default: | |
99fea682 | 1185 | XE_WARN_ON("Unknown message type"); |
dd08ebf6 MB |
1186 | } |
1187 | } | |
1188 | ||
1189 | static const struct drm_sched_backend_ops drm_sched_ops = { | |
9b9529ce FD |
1190 | .run_job = guc_exec_queue_run_job, |
1191 | .free_job = guc_exec_queue_free_job, | |
1192 | .timedout_job = guc_exec_queue_timedout_job, | |
dd08ebf6 MB |
1193 | }; |
1194 | ||
1195 | static const struct xe_sched_backend_ops xe_sched_ops = { | |
9b9529ce | 1196 | .process_msg = guc_exec_queue_process_msg, |
dd08ebf6 MB |
1197 | }; |
1198 | ||
9b9529ce | 1199 | static int guc_exec_queue_init(struct xe_exec_queue *q) |
dd08ebf6 MB |
1200 | { |
1201 | struct xe_gpu_scheduler *sched; | |
9b9529ce | 1202 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 1203 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 1204 | struct xe_guc_exec_queue *ge; |
dd08ebf6 MB |
1205 | long timeout; |
1206 | int err; | |
1207 | ||
c4991ee0 | 1208 | xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); |
dd08ebf6 MB |
1209 | |
1210 | ge = kzalloc(sizeof(*ge), GFP_KERNEL); | |
1211 | if (!ge) | |
1212 | return -ENOMEM; | |
1213 | ||
9b9529ce FD |
1214 | q->guc = ge; |
1215 | ge->q = q; | |
dd08ebf6 MB |
1216 | init_waitqueue_head(&ge->suspend_wait); |
1217 | ||
fdb6a053 | 1218 | timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : |
eef55700 | 1219 | q->hwe->eclass->sched_props.job_timeout_ms; |
a839e365 MB |
1220 | err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, |
1221 | get_submit_wq(guc), | |
1222 | q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, | |
1223 | timeout, guc_to_gt(guc)->ordered_wq, NULL, | |
1224 | q->name, gt_to_xe(q->gt)->drm.dev); | |
dd08ebf6 MB |
1225 | if (err) |
1226 | goto err_free; | |
1227 | ||
1228 | sched = &ge->sched; | |
1229 | err = xe_sched_entity_init(&ge->entity, sched); | |
1230 | if (err) | |
1231 | goto err_sched; | |
dd08ebf6 | 1232 | |
9b9529ce FD |
1233 | if (xe_exec_queue_is_lr(q)) |
1234 | INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); | |
8ae8a2e8 | 1235 | |
dd08ebf6 MB |
1236 | mutex_lock(&guc->submission_state.lock); |
1237 | ||
9b9529ce | 1238 | err = alloc_guc_id(guc, q); |
dd08ebf6 MB |
1239 | if (err) |
1240 | goto err_entity; | |
1241 | ||
9b9529ce | 1242 | q->entity = &ge->entity; |
dd08ebf6 MB |
1243 | |
1244 | if (guc_read_stopped(guc)) | |
1245 | xe_sched_stop(sched); | |
1246 | ||
1247 | mutex_unlock(&guc->submission_state.lock); | |
1248 | ||
0b1d1473 | 1249 | xe_exec_queue_assign_name(q, q->guc->id); |
dd08ebf6 | 1250 | |
9b9529ce | 1251 | trace_xe_exec_queue_create(q); |
dd08ebf6 MB |
1252 | |
1253 | return 0; | |
1254 | ||
1255 | err_entity: | |
1256 | xe_sched_entity_fini(&ge->entity); | |
1257 | err_sched: | |
1258 | xe_sched_fini(&ge->sched); | |
1259 | err_free: | |
1260 | kfree(ge); | |
1261 | ||
1262 | return err; | |
1263 | } | |
1264 | ||
9b9529ce | 1265 | static void guc_exec_queue_kill(struct xe_exec_queue *q) |
dd08ebf6 | 1266 | { |
9b9529ce FD |
1267 | trace_xe_exec_queue_kill(q); |
1268 | set_exec_queue_killed(q); | |
1269 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1270 | } |
1271 | ||
9b9529ce FD |
1272 | static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, |
1273 | u32 opcode) | |
dd08ebf6 MB |
1274 | { |
1275 | INIT_LIST_HEAD(&msg->link); | |
1276 | msg->opcode = opcode; | |
9b9529ce | 1277 | msg->private_data = q; |
dd08ebf6 MB |
1278 | |
1279 | trace_xe_sched_msg_add(msg); | |
9b9529ce | 1280 | xe_sched_add_msg(&q->guc->sched, msg); |
dd08ebf6 MB |
1281 | } |
1282 | ||
1283 | #define STATIC_MSG_CLEANUP 0 | |
1284 | #define STATIC_MSG_SUSPEND 1 | |
1285 | #define STATIC_MSG_RESUME 2 | |
9b9529ce | 1286 | static void guc_exec_queue_fini(struct xe_exec_queue *q) |
dd08ebf6 | 1287 | { |
9b9529ce | 1288 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; |
dd08ebf6 | 1289 | |
923e4238 | 1290 | if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) |
9b9529ce | 1291 | guc_exec_queue_add_msg(q, msg, CLEANUP); |
dd08ebf6 | 1292 | else |
9b9529ce | 1293 | __guc_exec_queue_fini(exec_queue_to_guc(q), q); |
dd08ebf6 MB |
1294 | } |
1295 | ||
9b9529ce FD |
1296 | static int guc_exec_queue_set_priority(struct xe_exec_queue *q, |
1297 | enum xe_exec_queue_priority priority) | |
dd08ebf6 MB |
1298 | { |
1299 | struct xe_sched_msg *msg; | |
1300 | ||
19c02225 | 1301 | if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) |
dd08ebf6 MB |
1302 | return 0; |
1303 | ||
1304 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1305 | if (!msg) | |
1306 | return -ENOMEM; | |
1307 | ||
19c02225 | 1308 | q->sched_props.priority = priority; |
fef257eb | 1309 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); |
dd08ebf6 MB |
1310 | |
1311 | return 0; | |
1312 | } | |
1313 | ||
9b9529ce | 1314 | static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) |
dd08ebf6 MB |
1315 | { |
1316 | struct xe_sched_msg *msg; | |
1317 | ||
9b9529ce FD |
1318 | if (q->sched_props.timeslice_us == timeslice_us || |
1319 | exec_queue_killed_or_banned(q)) | |
dd08ebf6 MB |
1320 | return 0; |
1321 | ||
1322 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1323 | if (!msg) | |
1324 | return -ENOMEM; | |
1325 | ||
9b9529ce FD |
1326 | q->sched_props.timeslice_us = timeslice_us; |
1327 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); | |
dd08ebf6 MB |
1328 | |
1329 | return 0; | |
1330 | } | |
1331 | ||
9b9529ce FD |
1332 | static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, |
1333 | u32 preempt_timeout_us) | |
dd08ebf6 MB |
1334 | { |
1335 | struct xe_sched_msg *msg; | |
1336 | ||
9b9529ce FD |
1337 | if (q->sched_props.preempt_timeout_us == preempt_timeout_us || |
1338 | exec_queue_killed_or_banned(q)) | |
dd08ebf6 MB |
1339 | return 0; |
1340 | ||
1341 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1342 | if (!msg) | |
1343 | return -ENOMEM; | |
1344 | ||
9b9529ce FD |
1345 | q->sched_props.preempt_timeout_us = preempt_timeout_us; |
1346 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); | |
dd08ebf6 MB |
1347 | |
1348 | return 0; | |
1349 | } | |
1350 | ||
9b9529ce | 1351 | static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) |
dd08ebf6 | 1352 | { |
9b9529ce | 1353 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
c73acc1e FD |
1354 | struct xe_guc *guc = exec_queue_to_guc(q); |
1355 | struct xe_device *xe = guc_to_xe(guc); | |
dd08ebf6 | 1356 | |
c73acc1e FD |
1357 | xe_assert(xe, !exec_queue_registered(q)); |
1358 | xe_assert(xe, !exec_queue_banned(q)); | |
1359 | xe_assert(xe, !exec_queue_killed(q)); | |
dd08ebf6 MB |
1360 | |
1361 | sched->base.timeout = job_timeout_ms; | |
1362 | ||
1363 | return 0; | |
1364 | } | |
1365 | ||
9b9529ce | 1366 | static int guc_exec_queue_suspend(struct xe_exec_queue *q) |
dd08ebf6 | 1367 | { |
9b9529ce | 1368 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; |
dd08ebf6 | 1369 | |
9b9529ce | 1370 | if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) |
dd08ebf6 MB |
1371 | return -EINVAL; |
1372 | ||
9b9529ce FD |
1373 | q->guc->suspend_pending = true; |
1374 | guc_exec_queue_add_msg(q, msg, SUSPEND); | |
dd08ebf6 MB |
1375 | |
1376 | return 0; | |
1377 | } | |
1378 | ||
9b9529ce | 1379 | static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) |
dd08ebf6 | 1380 | { |
9b9529ce | 1381 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 1382 | |
9b9529ce | 1383 | wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || |
dd08ebf6 MB |
1384 | guc_read_stopped(guc)); |
1385 | } | |
1386 | ||
9b9529ce | 1387 | static void guc_exec_queue_resume(struct xe_exec_queue *q) |
dd08ebf6 | 1388 | { |
9b9529ce | 1389 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; |
c73acc1e FD |
1390 | struct xe_guc *guc = exec_queue_to_guc(q); |
1391 | struct xe_device *xe = guc_to_xe(guc); | |
dd08ebf6 | 1392 | |
c73acc1e | 1393 | xe_assert(xe, !q->guc->suspend_pending); |
dd08ebf6 | 1394 | |
9b9529ce | 1395 | guc_exec_queue_add_msg(q, msg, RESUME); |
dd08ebf6 MB |
1396 | } |
1397 | ||
e670f0b4 BK |
1398 | static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) |
1399 | { | |
1400 | return exec_queue_reset(q); | |
1401 | } | |
1402 | ||
dd08ebf6 MB |
1403 | /* |
1404 | * All of these functions are an abstraction layer which other parts of XE can | |
1405 | * use to trap into the GuC backend. All of these functions, aside from init, | |
1406 | * really shouldn't do much other than trap into the DRM scheduler which | |
1407 | * synchronizes these operations. | |
1408 | */ | |
9b9529ce FD |
1409 | static const struct xe_exec_queue_ops guc_exec_queue_ops = { |
1410 | .init = guc_exec_queue_init, | |
1411 | .kill = guc_exec_queue_kill, | |
1412 | .fini = guc_exec_queue_fini, | |
1413 | .set_priority = guc_exec_queue_set_priority, | |
1414 | .set_timeslice = guc_exec_queue_set_timeslice, | |
1415 | .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, | |
1416 | .set_job_timeout = guc_exec_queue_set_job_timeout, | |
1417 | .suspend = guc_exec_queue_suspend, | |
1418 | .suspend_wait = guc_exec_queue_suspend_wait, | |
1419 | .resume = guc_exec_queue_resume, | |
e670f0b4 | 1420 | .reset_status = guc_exec_queue_reset_status, |
dd08ebf6 MB |
1421 | }; |
1422 | ||
9b9529ce | 1423 | static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 1424 | { |
9b9529ce | 1425 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 MB |
1426 | |
1427 | /* Stop scheduling + flush any DRM scheduler operations */ | |
1428 | xe_sched_submission_stop(sched); | |
1429 | ||
1430 | /* Clean up lost G2H + reset engine state */ | |
9b9529ce FD |
1431 | if (exec_queue_registered(q)) { |
1432 | if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || | |
1433 | xe_exec_queue_is_lr(q)) | |
1434 | xe_exec_queue_put(q); | |
1435 | else if (exec_queue_destroyed(q)) | |
1436 | __guc_exec_queue_fini(guc, q); | |
dd08ebf6 | 1437 | } |
9b9529ce FD |
1438 | if (q->guc->suspend_pending) { |
1439 | set_exec_queue_suspended(q); | |
1440 | suspend_fence_signal(q); | |
dd08ebf6 | 1441 | } |
9b9529ce FD |
1442 | atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, |
1443 | &q->guc->state); | |
1444 | q->guc->resume_time = 0; | |
1445 | trace_xe_exec_queue_stop(q); | |
dd08ebf6 MB |
1446 | |
1447 | /* | |
1448 | * Ban any engine (aside from kernel and engines used for VM ops) with a | |
1449 | * started but not complete job or if a job has gone through a GT reset | |
1450 | * more than twice. | |
1451 | */ | |
9b9529ce | 1452 | if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { |
dd08ebf6 MB |
1453 | struct xe_sched_job *job = xe_sched_first_pending_job(sched); |
1454 | ||
1455 | if (job) { | |
1456 | if ((xe_sched_job_started(job) && | |
1457 | !xe_sched_job_completed(job)) || | |
1458 | xe_sched_invalidate_job(job, 2)) { | |
1459 | trace_xe_sched_job_ban(job); | |
9b9529ce FD |
1460 | xe_sched_tdr_queue_imm(&q->guc->sched); |
1461 | set_exec_queue_banned(q); | |
dd08ebf6 MB |
1462 | } |
1463 | } | |
1464 | } | |
1465 | } | |
1466 | ||
1467 | int xe_guc_submit_reset_prepare(struct xe_guc *guc) | |
1468 | { | |
1469 | int ret; | |
1470 | ||
1471 | /* | |
1472 | * Using an atomic here rather than submission_state.lock as this | |
1473 | * function can be called while holding the CT lock (engine reset | |
1474 | * failure). submission_state.lock needs the CT lock to resubmit jobs. | |
1475 | * Atomic is not ideal, but it works to prevent against concurrent reset | |
1476 | * and releasing any TDRs waiting on guc->submission_state.stopped. | |
1477 | */ | |
1478 | ret = atomic_fetch_or(1, &guc->submission_state.stopped); | |
1479 | smp_wmb(); | |
1480 | wake_up_all(&guc->ct.wq); | |
1481 | ||
1482 | return ret; | |
1483 | } | |
1484 | ||
1485 | void xe_guc_submit_reset_wait(struct xe_guc *guc) | |
1486 | { | |
1487 | wait_event(guc->ct.wq, !guc_read_stopped(guc)); | |
1488 | } | |
1489 | ||
1490 | int xe_guc_submit_stop(struct xe_guc *guc) | |
1491 | { | |
9b9529ce | 1492 | struct xe_exec_queue *q; |
dd08ebf6 | 1493 | unsigned long index; |
c73acc1e | 1494 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1495 | |
c73acc1e | 1496 | xe_assert(xe, guc_read_stopped(guc) == 1); |
dd08ebf6 MB |
1497 | |
1498 | mutex_lock(&guc->submission_state.lock); | |
1499 | ||
9b9529ce FD |
1500 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1501 | guc_exec_queue_stop(guc, q); | |
dd08ebf6 MB |
1502 | |
1503 | mutex_unlock(&guc->submission_state.lock); | |
1504 | ||
1505 | /* | |
1506 | * No one can enter the backend at this point, aside from new engine | |
1507 | * creation which is protected by guc->submission_state.lock. | |
1508 | */ | |
1509 | ||
1510 | return 0; | |
1511 | } | |
1512 | ||
9b9529ce | 1513 | static void guc_exec_queue_start(struct xe_exec_queue *q) |
dd08ebf6 | 1514 | { |
9b9529ce | 1515 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 | 1516 | |
9b9529ce | 1517 | if (!exec_queue_killed_or_banned(q)) { |
dd08ebf6 MB |
1518 | int i; |
1519 | ||
9b9529ce FD |
1520 | trace_xe_exec_queue_resubmit(q); |
1521 | for (i = 0; i < q->width; ++i) | |
1522 | xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); | |
dd08ebf6 MB |
1523 | xe_sched_resubmit_jobs(sched); |
1524 | } | |
1525 | ||
1526 | xe_sched_submission_start(sched); | |
1527 | } | |
1528 | ||
1529 | int xe_guc_submit_start(struct xe_guc *guc) | |
1530 | { | |
9b9529ce | 1531 | struct xe_exec_queue *q; |
dd08ebf6 | 1532 | unsigned long index; |
c73acc1e | 1533 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1534 | |
c73acc1e | 1535 | xe_assert(xe, guc_read_stopped(guc) == 1); |
dd08ebf6 MB |
1536 | |
1537 | mutex_lock(&guc->submission_state.lock); | |
1538 | atomic_dec(&guc->submission_state.stopped); | |
9b9529ce FD |
1539 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1540 | guc_exec_queue_start(q); | |
dd08ebf6 MB |
1541 | mutex_unlock(&guc->submission_state.lock); |
1542 | ||
1543 | wake_up_all(&guc->ct.wq); | |
1544 | ||
1545 | return 0; | |
1546 | } | |
1547 | ||
9b9529ce FD |
1548 | static struct xe_exec_queue * |
1549 | g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) | |
dd08ebf6 MB |
1550 | { |
1551 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1552 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1553 | |
1554 | if (unlikely(guc_id >= GUC_ID_MAX)) { | |
1555 | drm_err(&xe->drm, "Invalid guc_id %u", guc_id); | |
1556 | return NULL; | |
1557 | } | |
1558 | ||
9b9529ce FD |
1559 | q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); |
1560 | if (unlikely(!q)) { | |
dd08ebf6 MB |
1561 | drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); |
1562 | return NULL; | |
1563 | } | |
1564 | ||
cb90d469 DCS |
1565 | xe_assert(xe, guc_id >= q->guc->id); |
1566 | xe_assert(xe, guc_id < (q->guc->id + q->width)); | |
dd08ebf6 | 1567 | |
9b9529ce | 1568 | return q; |
dd08ebf6 MB |
1569 | } |
1570 | ||
9b9529ce | 1571 | static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
1572 | { |
1573 | u32 action[] = { | |
1574 | XE_GUC_ACTION_DEREGISTER_CONTEXT, | |
9b9529ce | 1575 | q->guc->id, |
dd08ebf6 MB |
1576 | }; |
1577 | ||
9b9529ce | 1578 | trace_xe_exec_queue_deregister(q); |
dd08ebf6 MB |
1579 | |
1580 | xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); | |
1581 | } | |
1582 | ||
1583 | int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1584 | { | |
1585 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1586 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1587 | u32 guc_id = msg[0]; |
1588 | ||
1589 | if (unlikely(len < 2)) { | |
1590 | drm_err(&xe->drm, "Invalid length %u", len); | |
1591 | return -EPROTO; | |
1592 | } | |
1593 | ||
9b9529ce FD |
1594 | q = g2h_exec_queue_lookup(guc, guc_id); |
1595 | if (unlikely(!q)) | |
dd08ebf6 MB |
1596 | return -EPROTO; |
1597 | ||
9b9529ce FD |
1598 | if (unlikely(!exec_queue_pending_enable(q) && |
1599 | !exec_queue_pending_disable(q))) { | |
dd08ebf6 | 1600 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", |
9b9529ce | 1601 | atomic_read(&q->guc->state)); |
dd08ebf6 MB |
1602 | return -EPROTO; |
1603 | } | |
1604 | ||
9b9529ce | 1605 | trace_xe_exec_queue_scheduling_done(q); |
dd08ebf6 | 1606 | |
9b9529ce FD |
1607 | if (exec_queue_pending_enable(q)) { |
1608 | q->guc->resume_time = ktime_get(); | |
1609 | clear_exec_queue_pending_enable(q); | |
dd08ebf6 MB |
1610 | smp_wmb(); |
1611 | wake_up_all(&guc->ct.wq); | |
1612 | } else { | |
9b9529ce FD |
1613 | clear_exec_queue_pending_disable(q); |
1614 | if (q->guc->suspend_pending) { | |
1615 | suspend_fence_signal(q); | |
dd08ebf6 | 1616 | } else { |
9b9529ce | 1617 | if (exec_queue_banned(q)) { |
dd08ebf6 MB |
1618 | smp_wmb(); |
1619 | wake_up_all(&guc->ct.wq); | |
1620 | } | |
9b9529ce | 1621 | deregister_exec_queue(guc, q); |
dd08ebf6 MB |
1622 | } |
1623 | } | |
1624 | ||
1625 | return 0; | |
1626 | } | |
1627 | ||
1628 | int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1629 | { | |
1630 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1631 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1632 | u32 guc_id = msg[0]; |
1633 | ||
1634 | if (unlikely(len < 1)) { | |
1635 | drm_err(&xe->drm, "Invalid length %u", len); | |
1636 | return -EPROTO; | |
1637 | } | |
1638 | ||
9b9529ce FD |
1639 | q = g2h_exec_queue_lookup(guc, guc_id); |
1640 | if (unlikely(!q)) | |
dd08ebf6 MB |
1641 | return -EPROTO; |
1642 | ||
9b9529ce FD |
1643 | if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || |
1644 | exec_queue_pending_enable(q) || exec_queue_enabled(q)) { | |
dd08ebf6 | 1645 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", |
9b9529ce | 1646 | atomic_read(&q->guc->state)); |
dd08ebf6 MB |
1647 | return -EPROTO; |
1648 | } | |
1649 | ||
9b9529ce | 1650 | trace_xe_exec_queue_deregister_done(q); |
dd08ebf6 | 1651 | |
9b9529ce | 1652 | clear_exec_queue_registered(q); |
8ae8a2e8 | 1653 | |
9b9529ce FD |
1654 | if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) |
1655 | xe_exec_queue_put(q); | |
dd08ebf6 | 1656 | else |
9b9529ce | 1657 | __guc_exec_queue_fini(guc, q); |
dd08ebf6 MB |
1658 | |
1659 | return 0; | |
1660 | } | |
1661 | ||
9b9529ce | 1662 | int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) |
dd08ebf6 MB |
1663 | { |
1664 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1665 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1666 | u32 guc_id = msg[0]; |
1667 | ||
1668 | if (unlikely(len < 1)) { | |
1669 | drm_err(&xe->drm, "Invalid length %u", len); | |
1670 | return -EPROTO; | |
1671 | } | |
1672 | ||
9b9529ce FD |
1673 | q = g2h_exec_queue_lookup(guc, guc_id); |
1674 | if (unlikely(!q)) | |
dd08ebf6 MB |
1675 | return -EPROTO; |
1676 | ||
1677 | drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); | |
1678 | ||
1679 | /* FIXME: Do error capture, most likely async */ | |
1680 | ||
9b9529ce | 1681 | trace_xe_exec_queue_reset(q); |
dd08ebf6 MB |
1682 | |
1683 | /* | |
1684 | * A banned engine is a NOP at this point (came from | |
9b9529ce | 1685 | * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel |
dd08ebf6 | 1686 | * jobs by setting timeout of the job to the minimum value kicking |
9b9529ce | 1687 | * guc_exec_queue_timedout_job. |
dd08ebf6 | 1688 | */ |
9b9529ce FD |
1689 | set_exec_queue_reset(q); |
1690 | if (!exec_queue_banned(q)) | |
1691 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1692 | |
1693 | return 0; | |
1694 | } | |
1695 | ||
9b9529ce FD |
1696 | int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, |
1697 | u32 len) | |
dd08ebf6 MB |
1698 | { |
1699 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1700 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1701 | u32 guc_id = msg[0]; |
1702 | ||
1703 | if (unlikely(len < 1)) { | |
1704 | drm_err(&xe->drm, "Invalid length %u", len); | |
1705 | return -EPROTO; | |
1706 | } | |
1707 | ||
9b9529ce FD |
1708 | q = g2h_exec_queue_lookup(guc, guc_id); |
1709 | if (unlikely(!q)) | |
dd08ebf6 MB |
1710 | return -EPROTO; |
1711 | ||
17d28aa8 | 1712 | drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); |
9b9529ce | 1713 | trace_xe_exec_queue_memory_cat_error(q); |
dd08ebf6 MB |
1714 | |
1715 | /* Treat the same as engine reset */ | |
9b9529ce FD |
1716 | set_exec_queue_reset(q); |
1717 | if (!exec_queue_banned(q)) | |
1718 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1719 | |
1720 | return 0; | |
1721 | } | |
1722 | ||
9b9529ce | 1723 | int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) |
dd08ebf6 MB |
1724 | { |
1725 | struct xe_device *xe = guc_to_xe(guc); | |
1726 | u8 guc_class, instance; | |
1727 | u32 reason; | |
1728 | ||
1729 | if (unlikely(len != 3)) { | |
1730 | drm_err(&xe->drm, "Invalid length %u", len); | |
1731 | return -EPROTO; | |
1732 | } | |
1733 | ||
1734 | guc_class = msg[0]; | |
1735 | instance = msg[1]; | |
1736 | reason = msg[2]; | |
1737 | ||
1738 | /* Unexpected failure of a hardware feature, log an actual error */ | |
1739 | drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", | |
1740 | guc_class, instance, reason); | |
1741 | ||
1742 | xe_gt_reset_async(guc_to_gt(guc)); | |
1743 | ||
1744 | return 0; | |
1745 | } | |
1746 | ||
bbdf97c1 | 1747 | static void |
9b9529ce FD |
1748 | guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, |
1749 | struct xe_guc_submit_exec_queue_snapshot *snapshot) | |
dd08ebf6 | 1750 | { |
9b9529ce | 1751 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 1752 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 1753 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
dd08ebf6 MB |
1754 | int i; |
1755 | ||
9b9529ce FD |
1756 | snapshot->guc.wqi_head = q->guc->wqi_head; |
1757 | snapshot->guc.wqi_tail = q->guc->wqi_tail; | |
bbdf97c1 RV |
1758 | snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); |
1759 | snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); | |
1760 | snapshot->parallel.wq_desc.status = parallel_read(xe, map, | |
1761 | wq_desc.wq_status); | |
1762 | ||
1763 | if (snapshot->parallel.wq_desc.head != | |
1764 | snapshot->parallel.wq_desc.tail) { | |
1765 | for (i = snapshot->parallel.wq_desc.head; | |
1766 | i != snapshot->parallel.wq_desc.tail; | |
1767 | i = (i + sizeof(u32)) % WQ_SIZE) | |
1768 | snapshot->parallel.wq[i / sizeof(u32)] = | |
1769 | parallel_read(xe, map, wq[i / sizeof(u32)]); | |
1770 | } | |
1771 | } | |
1772 | ||
1773 | static void | |
9b9529ce FD |
1774 | guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, |
1775 | struct drm_printer *p) | |
bbdf97c1 RV |
1776 | { |
1777 | int i; | |
1778 | ||
dd08ebf6 | 1779 | drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", |
bbdf97c1 | 1780 | snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); |
dd08ebf6 | 1781 | drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", |
bbdf97c1 RV |
1782 | snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); |
1783 | drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); | |
1784 | ||
1785 | if (snapshot->parallel.wq_desc.head != | |
1786 | snapshot->parallel.wq_desc.tail) { | |
1787 | for (i = snapshot->parallel.wq_desc.head; | |
1788 | i != snapshot->parallel.wq_desc.tail; | |
dd08ebf6 | 1789 | i = (i + sizeof(u32)) % WQ_SIZE) |
857912c3 | 1790 | drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), |
bbdf97c1 | 1791 | snapshot->parallel.wq[i / sizeof(u32)]); |
dd08ebf6 MB |
1792 | } |
1793 | } | |
1794 | ||
bbdf97c1 | 1795 | /** |
9b9529ce FD |
1796 | * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. |
1797 | * @q: Xe exec queue. | |
bbdf97c1 RV |
1798 | * |
1799 | * This can be printed out in a later stage like during dev_coredump | |
1800 | * analysis. | |
1801 | * | |
1802 | * Returns: a GuC Submit Engine snapshot object that must be freed by the | |
9b9529ce | 1803 | * caller, using `xe_guc_exec_queue_snapshot_free`. |
bbdf97c1 | 1804 | */ |
9b9529ce FD |
1805 | struct xe_guc_submit_exec_queue_snapshot * |
1806 | xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) | |
dd08ebf6 | 1807 | { |
9b9529ce | 1808 | struct xe_guc *guc = exec_queue_to_guc(q); |
bbdf97c1 | 1809 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 1810 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 | 1811 | struct xe_sched_job *job; |
9b9529ce | 1812 | struct xe_guc_submit_exec_queue_snapshot *snapshot; |
bbdf97c1 RV |
1813 | int i; |
1814 | ||
1815 | snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); | |
1816 | ||
1817 | if (!snapshot) { | |
1818 | drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); | |
1819 | return NULL; | |
1820 | } | |
1821 | ||
9b9529ce FD |
1822 | snapshot->guc.id = q->guc->id; |
1823 | memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); | |
1824 | snapshot->class = q->class; | |
1825 | snapshot->logical_mask = q->logical_mask; | |
1826 | snapshot->width = q->width; | |
1827 | snapshot->refcount = kref_read(&q->refcount); | |
bbdf97c1 | 1828 | snapshot->sched_timeout = sched->base.timeout; |
9b9529ce | 1829 | snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; |
bbdf97c1 | 1830 | snapshot->sched_props.preempt_timeout_us = |
9b9529ce | 1831 | q->sched_props.preempt_timeout_us; |
bbdf97c1 | 1832 | |
9b9529ce | 1833 | snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), |
bbdf97c1 RV |
1834 | GFP_ATOMIC); |
1835 | ||
1836 | if (!snapshot->lrc) { | |
1837 | drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); | |
1838 | } else { | |
9b9529ce FD |
1839 | for (i = 0; i < q->width; ++i) { |
1840 | struct xe_lrc *lrc = q->lrc + i; | |
bbdf97c1 RV |
1841 | |
1842 | snapshot->lrc[i].context_desc = | |
1843 | lower_32_bits(xe_lrc_ggtt_addr(lrc)); | |
1844 | snapshot->lrc[i].head = xe_lrc_ring_head(lrc); | |
1845 | snapshot->lrc[i].tail.internal = lrc->ring.tail; | |
1846 | snapshot->lrc[i].tail.memory = | |
1847 | xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); | |
1848 | snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); | |
1849 | snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); | |
1850 | } | |
1851 | } | |
1852 | ||
9b9529ce FD |
1853 | snapshot->schedule_state = atomic_read(&q->guc->state); |
1854 | snapshot->exec_queue_flags = q->flags; | |
bbdf97c1 | 1855 | |
9b9529ce | 1856 | snapshot->parallel_execution = xe_exec_queue_is_parallel(q); |
bbdf97c1 | 1857 | if (snapshot->parallel_execution) |
9b9529ce | 1858 | guc_exec_queue_wq_snapshot_capture(q, snapshot); |
bbdf97c1 RV |
1859 | |
1860 | spin_lock(&sched->base.job_list_lock); | |
1861 | snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); | |
1862 | snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, | |
1863 | sizeof(struct pending_list_snapshot), | |
1864 | GFP_ATOMIC); | |
1865 | ||
1866 | if (!snapshot->pending_list) { | |
1867 | drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); | |
1868 | } else { | |
1869 | i = 0; | |
1870 | list_for_each_entry(job, &sched->base.pending_list, drm.list) { | |
1871 | snapshot->pending_list[i].seqno = | |
1872 | xe_sched_job_seqno(job); | |
1873 | snapshot->pending_list[i].fence = | |
1874 | dma_fence_is_signaled(job->fence) ? 1 : 0; | |
1875 | snapshot->pending_list[i].finished = | |
1876 | dma_fence_is_signaled(&job->drm.s_fence->finished) | |
1877 | ? 1 : 0; | |
1878 | i++; | |
1879 | } | |
1880 | } | |
1881 | ||
1882 | spin_unlock(&sched->base.job_list_lock); | |
1883 | ||
1884 | return snapshot; | |
1885 | } | |
1886 | ||
1887 | /** | |
9b9529ce | 1888 | * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. |
bbdf97c1 RV |
1889 | * @snapshot: GuC Submit Engine snapshot object. |
1890 | * @p: drm_printer where it will be printed out. | |
1891 | * | |
1892 | * This function prints out a given GuC Submit Engine snapshot object. | |
1893 | */ | |
1894 | void | |
9b9529ce FD |
1895 | xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, |
1896 | struct drm_printer *p) | |
bbdf97c1 | 1897 | { |
dd08ebf6 MB |
1898 | int i; |
1899 | ||
bbdf97c1 RV |
1900 | if (!snapshot) |
1901 | return; | |
1902 | ||
1903 | drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); | |
1904 | drm_printf(p, "\tName: %s\n", snapshot->name); | |
1905 | drm_printf(p, "\tClass: %d\n", snapshot->class); | |
1906 | drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); | |
1907 | drm_printf(p, "\tWidth: %d\n", snapshot->width); | |
1908 | drm_printf(p, "\tRef: %d\n", snapshot->refcount); | |
1909 | drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); | |
1910 | drm_printf(p, "\tTimeslice: %u (us)\n", | |
1911 | snapshot->sched_props.timeslice_us); | |
dd08ebf6 | 1912 | drm_printf(p, "\tPreempt timeout: %u (us)\n", |
bbdf97c1 | 1913 | snapshot->sched_props.preempt_timeout_us); |
dd08ebf6 | 1914 | |
bbdf97c1 | 1915 | for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { |
dd08ebf6 | 1916 | drm_printf(p, "\tHW Context Desc: 0x%08x\n", |
bbdf97c1 | 1917 | snapshot->lrc[i].context_desc); |
dd08ebf6 | 1918 | drm_printf(p, "\tLRC Head: (memory) %u\n", |
bbdf97c1 | 1919 | snapshot->lrc[i].head); |
dd08ebf6 | 1920 | drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", |
bbdf97c1 RV |
1921 | snapshot->lrc[i].tail.internal, |
1922 | snapshot->lrc[i].tail.memory); | |
dd08ebf6 | 1923 | drm_printf(p, "\tStart seqno: (memory) %d\n", |
bbdf97c1 RV |
1924 | snapshot->lrc[i].start_seqno); |
1925 | drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); | |
dd08ebf6 | 1926 | } |
bbdf97c1 | 1927 | drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); |
9b9529ce | 1928 | drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); |
dd08ebf6 | 1929 | |
bbdf97c1 | 1930 | if (snapshot->parallel_execution) |
9b9529ce | 1931 | guc_exec_queue_wq_snapshot_print(snapshot, p); |
1825c492 | 1932 | |
bbdf97c1 RV |
1933 | for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; |
1934 | i++) | |
dd08ebf6 | 1935 | drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", |
bbdf97c1 RV |
1936 | snapshot->pending_list[i].seqno, |
1937 | snapshot->pending_list[i].fence, | |
1938 | snapshot->pending_list[i].finished); | |
1939 | } | |
1940 | ||
1941 | /** | |
9b9529ce | 1942 | * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given |
bbdf97c1 RV |
1943 | * snapshot. |
1944 | * @snapshot: GuC Submit Engine snapshot object. | |
1945 | * | |
1946 | * This function free all the memory that needed to be allocated at capture | |
1947 | * time. | |
1948 | */ | |
9b9529ce | 1949 | void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) |
bbdf97c1 RV |
1950 | { |
1951 | if (!snapshot) | |
1952 | return; | |
1953 | ||
1954 | kfree(snapshot->lrc); | |
1955 | kfree(snapshot->pending_list); | |
1956 | kfree(snapshot); | |
dd08ebf6 MB |
1957 | } |
1958 | ||
9b9529ce | 1959 | static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) |
bbdf97c1 | 1960 | { |
9b9529ce | 1961 | struct xe_guc_submit_exec_queue_snapshot *snapshot; |
bbdf97c1 | 1962 | |
9b9529ce FD |
1963 | snapshot = xe_guc_exec_queue_snapshot_capture(q); |
1964 | xe_guc_exec_queue_snapshot_print(snapshot, p); | |
1965 | xe_guc_exec_queue_snapshot_free(snapshot); | |
bbdf97c1 RV |
1966 | } |
1967 | ||
1968 | /** | |
1969 | * xe_guc_submit_print - GuC Submit Print. | |
1970 | * @guc: GuC. | |
1971 | * @p: drm_printer where it will be printed out. | |
1972 | * | |
1973 | * This function capture and prints snapshots of **all** GuC Engines. | |
1974 | */ | |
dd08ebf6 MB |
1975 | void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) |
1976 | { | |
9b9529ce | 1977 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1978 | unsigned long index; |
1979 | ||
c4991ee0 | 1980 | if (!xe_device_uc_enabled(guc_to_xe(guc))) |
dd08ebf6 MB |
1981 | return; |
1982 | ||
1983 | mutex_lock(&guc->submission_state.lock); | |
9b9529ce FD |
1984 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1985 | guc_exec_queue_print(q, p); | |
dd08ebf6 MB |
1986 | mutex_unlock(&guc->submission_state.lock); |
1987 | } |