Commit | Line | Data |
---|---|---|
dd08ebf6 MB |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2022 Intel Corporation | |
4 | */ | |
5 | ||
ea9f879d LDM |
6 | #include "xe_guc_submit.h" |
7 | ||
dd08ebf6 MB |
8 | #include <linux/bitfield.h> |
9 | #include <linux/bitmap.h> | |
10 | #include <linux/circ_buf.h> | |
11 | #include <linux/delay.h> | |
12 | #include <linux/dma-fence-array.h> | |
13 | ||
14 | #include <drm/drm_managed.h> | |
15 | ||
b67cb798 MW |
16 | #include "abi/guc_actions_abi.h" |
17 | #include "abi/guc_klvs_abi.h" | |
0992884d | 18 | #include "regs/xe_lrc_layout.h" |
c73acc1e | 19 | #include "xe_assert.h" |
e7994850 | 20 | #include "xe_devcoredump.h" |
dd08ebf6 | 21 | #include "xe_device.h" |
c22a4ed0 | 22 | #include "xe_exec_queue.h" |
ea9f879d LDM |
23 | #include "xe_force_wake.h" |
24 | #include "xe_gpu_scheduler.h" | |
25 | #include "xe_gt.h" | |
996da37f | 26 | #include "xe_gt_printk.h" |
dd08ebf6 MB |
27 | #include "xe_guc.h" |
28 | #include "xe_guc_ct.h" | |
9b9529ce | 29 | #include "xe_guc_exec_queue_types.h" |
1825c492 | 30 | #include "xe_guc_submit_types.h" |
dd08ebf6 MB |
31 | #include "xe_hw_engine.h" |
32 | #include "xe_hw_fence.h" | |
33 | #include "xe_lrc.h" | |
34 | #include "xe_macros.h" | |
35 | #include "xe_map.h" | |
36 | #include "xe_mocs.h" | |
37 | #include "xe_ring_ops_types.h" | |
38 | #include "xe_sched_job.h" | |
39 | #include "xe_trace.h" | |
40 | #include "xe_vm.h" | |
41 | ||
dd08ebf6 | 42 | static struct xe_guc * |
9b9529ce | 43 | exec_queue_to_guc(struct xe_exec_queue *q) |
dd08ebf6 | 44 | { |
9b9529ce | 45 | return &q->gt->uc.guc; |
dd08ebf6 MB |
46 | } |
47 | ||
48 | /* | |
49 | * Helpers for engine state, using an atomic as some of the bits can transition | |
50 | * as the same time (e.g. a suspend can be happning at the same time as schedule | |
51 | * engine done being processed). | |
52 | */ | |
9b9529ce | 53 | #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) |
dd08ebf6 | 54 | #define ENGINE_STATE_ENABLED (1 << 1) |
9b9529ce FD |
55 | #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) |
56 | #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) | |
57 | #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) | |
dd08ebf6 | 58 | #define ENGINE_STATE_SUSPENDED (1 << 5) |
9b9529ce | 59 | #define EXEC_QUEUE_STATE_RESET (1 << 6) |
dd08ebf6 MB |
60 | #define ENGINE_STATE_KILLED (1 << 7) |
61 | ||
9b9529ce | 62 | static bool exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 63 | { |
9b9529ce | 64 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; |
dd08ebf6 MB |
65 | } |
66 | ||
9b9529ce | 67 | static void set_exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 68 | { |
9b9529ce | 69 | atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); |
dd08ebf6 MB |
70 | } |
71 | ||
9b9529ce | 72 | static void clear_exec_queue_registered(struct xe_exec_queue *q) |
dd08ebf6 | 73 | { |
9b9529ce | 74 | atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); |
dd08ebf6 MB |
75 | } |
76 | ||
9b9529ce | 77 | static bool exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 78 | { |
9b9529ce | 79 | return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; |
dd08ebf6 MB |
80 | } |
81 | ||
9b9529ce | 82 | static void set_exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 83 | { |
9b9529ce | 84 | atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); |
dd08ebf6 MB |
85 | } |
86 | ||
9b9529ce | 87 | static void clear_exec_queue_enabled(struct xe_exec_queue *q) |
dd08ebf6 | 88 | { |
9b9529ce | 89 | atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); |
dd08ebf6 MB |
90 | } |
91 | ||
9b9529ce | 92 | static bool exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 93 | { |
9b9529ce | 94 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; |
dd08ebf6 MB |
95 | } |
96 | ||
9b9529ce | 97 | static void set_exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 98 | { |
9b9529ce | 99 | atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); |
dd08ebf6 MB |
100 | } |
101 | ||
9b9529ce | 102 | static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) |
dd08ebf6 | 103 | { |
9b9529ce | 104 | atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); |
dd08ebf6 MB |
105 | } |
106 | ||
9b9529ce | 107 | static bool exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 108 | { |
9b9529ce | 109 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; |
dd08ebf6 MB |
110 | } |
111 | ||
9b9529ce | 112 | static void set_exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 113 | { |
9b9529ce | 114 | atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); |
dd08ebf6 MB |
115 | } |
116 | ||
9b9529ce | 117 | static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) |
dd08ebf6 | 118 | { |
9b9529ce | 119 | atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); |
dd08ebf6 MB |
120 | } |
121 | ||
9b9529ce | 122 | static bool exec_queue_destroyed(struct xe_exec_queue *q) |
dd08ebf6 | 123 | { |
9b9529ce | 124 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; |
dd08ebf6 MB |
125 | } |
126 | ||
9b9529ce | 127 | static void set_exec_queue_destroyed(struct xe_exec_queue *q) |
dd08ebf6 | 128 | { |
9b9529ce | 129 | atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); |
dd08ebf6 MB |
130 | } |
131 | ||
9b9529ce | 132 | static bool exec_queue_banned(struct xe_exec_queue *q) |
dd08ebf6 | 133 | { |
9b9529ce | 134 | return (q->flags & EXEC_QUEUE_FLAG_BANNED); |
dd08ebf6 MB |
135 | } |
136 | ||
9b9529ce | 137 | static void set_exec_queue_banned(struct xe_exec_queue *q) |
dd08ebf6 | 138 | { |
9b9529ce | 139 | q->flags |= EXEC_QUEUE_FLAG_BANNED; |
dd08ebf6 MB |
140 | } |
141 | ||
9b9529ce | 142 | static bool exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 143 | { |
9b9529ce | 144 | return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; |
dd08ebf6 MB |
145 | } |
146 | ||
9b9529ce | 147 | static void set_exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 148 | { |
9b9529ce | 149 | atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); |
dd08ebf6 MB |
150 | } |
151 | ||
9b9529ce | 152 | static void clear_exec_queue_suspended(struct xe_exec_queue *q) |
dd08ebf6 | 153 | { |
9b9529ce | 154 | atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); |
dd08ebf6 MB |
155 | } |
156 | ||
9b9529ce | 157 | static bool exec_queue_reset(struct xe_exec_queue *q) |
dd08ebf6 | 158 | { |
9b9529ce | 159 | return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; |
dd08ebf6 MB |
160 | } |
161 | ||
9b9529ce | 162 | static void set_exec_queue_reset(struct xe_exec_queue *q) |
dd08ebf6 | 163 | { |
9b9529ce | 164 | atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); |
dd08ebf6 MB |
165 | } |
166 | ||
9b9529ce | 167 | static bool exec_queue_killed(struct xe_exec_queue *q) |
dd08ebf6 | 168 | { |
9b9529ce | 169 | return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; |
dd08ebf6 MB |
170 | } |
171 | ||
9b9529ce | 172 | static void set_exec_queue_killed(struct xe_exec_queue *q) |
dd08ebf6 | 173 | { |
9b9529ce | 174 | atomic_or(ENGINE_STATE_KILLED, &q->guc->state); |
dd08ebf6 MB |
175 | } |
176 | ||
9b9529ce | 177 | static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) |
dd08ebf6 | 178 | { |
9b9529ce | 179 | return exec_queue_killed(q) || exec_queue_banned(q); |
dd08ebf6 MB |
180 | } |
181 | ||
a839e365 MB |
182 | #ifdef CONFIG_PROVE_LOCKING |
183 | static int alloc_submit_wq(struct xe_guc *guc) | |
184 | { | |
185 | int i; | |
186 | ||
187 | for (i = 0; i < NUM_SUBMIT_WQ; ++i) { | |
188 | guc->submission_state.submit_wq_pool[i] = | |
189 | alloc_ordered_workqueue("submit_wq", 0); | |
190 | if (!guc->submission_state.submit_wq_pool[i]) | |
191 | goto err_free; | |
192 | } | |
193 | ||
194 | return 0; | |
195 | ||
196 | err_free: | |
197 | while (i) | |
198 | destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); | |
199 | ||
200 | return -ENOMEM; | |
201 | } | |
202 | ||
203 | static void free_submit_wq(struct xe_guc *guc) | |
204 | { | |
205 | int i; | |
206 | ||
207 | for (i = 0; i < NUM_SUBMIT_WQ; ++i) | |
208 | destroy_workqueue(guc->submission_state.submit_wq_pool[i]); | |
209 | } | |
210 | ||
211 | static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) | |
212 | { | |
213 | int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; | |
214 | ||
215 | return guc->submission_state.submit_wq_pool[idx]; | |
216 | } | |
217 | #else | |
218 | static int alloc_submit_wq(struct xe_guc *guc) | |
219 | { | |
220 | return 0; | |
221 | } | |
222 | ||
223 | static void free_submit_wq(struct xe_guc *guc) | |
224 | { | |
225 | ||
226 | } | |
227 | ||
228 | static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) | |
229 | { | |
230 | return NULL; | |
231 | } | |
232 | #endif | |
233 | ||
dd08ebf6 MB |
234 | static void guc_submit_fini(struct drm_device *drm, void *arg) |
235 | { | |
236 | struct xe_guc *guc = arg; | |
237 | ||
9b9529ce | 238 | xa_destroy(&guc->submission_state.exec_queue_lookup); |
dd08ebf6 MB |
239 | ida_destroy(&guc->submission_state.guc_ids); |
240 | bitmap_free(guc->submission_state.guc_ids_bitmap); | |
a839e365 | 241 | free_submit_wq(guc); |
28b1d915 | 242 | mutex_destroy(&guc->submission_state.lock); |
dd08ebf6 MB |
243 | } |
244 | ||
245 | #define GUC_ID_MAX 65535 | |
246 | #define GUC_ID_NUMBER_MLRC 4096 | |
247 | #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) | |
248 | #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC | |
249 | ||
9b9529ce | 250 | static const struct xe_exec_queue_ops guc_exec_queue_ops; |
dd08ebf6 MB |
251 | |
252 | static void primelockdep(struct xe_guc *guc) | |
253 | { | |
254 | if (!IS_ENABLED(CONFIG_LOCKDEP)) | |
255 | return; | |
256 | ||
257 | fs_reclaim_acquire(GFP_KERNEL); | |
258 | ||
259 | mutex_lock(&guc->submission_state.lock); | |
260 | might_lock(&guc->submission_state.suspend.lock); | |
261 | mutex_unlock(&guc->submission_state.lock); | |
262 | ||
263 | fs_reclaim_release(GFP_KERNEL); | |
264 | } | |
265 | ||
266 | int xe_guc_submit_init(struct xe_guc *guc) | |
267 | { | |
268 | struct xe_device *xe = guc_to_xe(guc); | |
269 | struct xe_gt *gt = guc_to_gt(guc); | |
270 | int err; | |
271 | ||
272 | guc->submission_state.guc_ids_bitmap = | |
273 | bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); | |
274 | if (!guc->submission_state.guc_ids_bitmap) | |
275 | return -ENOMEM; | |
276 | ||
a839e365 MB |
277 | err = alloc_submit_wq(guc); |
278 | if (err) { | |
279 | bitmap_free(guc->submission_state.guc_ids_bitmap); | |
280 | return err; | |
281 | } | |
282 | ||
9b9529ce | 283 | gt->exec_queue_ops = &guc_exec_queue_ops; |
dd08ebf6 MB |
284 | |
285 | mutex_init(&guc->submission_state.lock); | |
9b9529ce | 286 | xa_init(&guc->submission_state.exec_queue_lookup); |
dd08ebf6 MB |
287 | ida_init(&guc->submission_state.guc_ids); |
288 | ||
289 | spin_lock_init(&guc->submission_state.suspend.lock); | |
290 | guc->submission_state.suspend.context = dma_fence_context_alloc(1); | |
291 | ||
292 | primelockdep(guc); | |
293 | ||
294 | err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); | |
295 | if (err) | |
296 | return err; | |
297 | ||
298 | return 0; | |
299 | } | |
300 | ||
cb90d469 DCS |
301 | static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) |
302 | { | |
303 | int i; | |
304 | ||
305 | lockdep_assert_held(&guc->submission_state.lock); | |
306 | ||
307 | for (i = 0; i < xa_count; ++i) | |
308 | xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); | |
309 | ||
310 | if (xe_exec_queue_is_parallel(q)) | |
311 | bitmap_release_region(guc->submission_state.guc_ids_bitmap, | |
312 | q->guc->id - GUC_ID_START_MLRC, | |
313 | order_base_2(q->width)); | |
314 | else | |
69a5f177 | 315 | ida_free(&guc->submission_state.guc_ids, q->guc->id); |
cb90d469 DCS |
316 | } |
317 | ||
9b9529ce | 318 | static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
319 | { |
320 | int ret; | |
321 | void *ptr; | |
cb90d469 | 322 | int i; |
dd08ebf6 MB |
323 | |
324 | /* | |
325 | * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, | |
326 | * worse case user gets -ENOMEM on engine create and has to try again. | |
327 | * | |
328 | * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent | |
329 | * failure. | |
330 | */ | |
331 | lockdep_assert_held(&guc->submission_state.lock); | |
332 | ||
9b9529ce | 333 | if (xe_exec_queue_is_parallel(q)) { |
dd08ebf6 MB |
334 | void *bitmap = guc->submission_state.guc_ids_bitmap; |
335 | ||
336 | ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, | |
9b9529ce | 337 | order_base_2(q->width)); |
dd08ebf6 | 338 | } else { |
69a5f177 CJ |
339 | ret = ida_alloc_max(&guc->submission_state.guc_ids, |
340 | GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT); | |
dd08ebf6 MB |
341 | } |
342 | if (ret < 0) | |
343 | return ret; | |
344 | ||
9b9529ce FD |
345 | q->guc->id = ret; |
346 | if (xe_exec_queue_is_parallel(q)) | |
347 | q->guc->id += GUC_ID_START_MLRC; | |
dd08ebf6 | 348 | |
cb90d469 DCS |
349 | for (i = 0; i < q->width; ++i) { |
350 | ptr = xa_store(&guc->submission_state.exec_queue_lookup, | |
351 | q->guc->id + i, q, GFP_NOWAIT); | |
352 | if (IS_ERR(ptr)) { | |
353 | ret = PTR_ERR(ptr); | |
354 | goto err_release; | |
355 | } | |
dd08ebf6 MB |
356 | } |
357 | ||
358 | return 0; | |
359 | ||
360 | err_release: | |
cb90d469 DCS |
361 | __release_guc_id(guc, q, i); |
362 | ||
dd08ebf6 MB |
363 | return ret; |
364 | } | |
365 | ||
9b9529ce | 366 | static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
367 | { |
368 | mutex_lock(&guc->submission_state.lock); | |
cb90d469 | 369 | __release_guc_id(guc, q, q->width); |
dd08ebf6 MB |
370 | mutex_unlock(&guc->submission_state.lock); |
371 | } | |
372 | ||
9b9529ce | 373 | struct exec_queue_policy { |
dd08ebf6 | 374 | u32 count; |
9b9529ce | 375 | struct guc_update_exec_queue_policy h2g; |
dd08ebf6 MB |
376 | }; |
377 | ||
9b9529ce | 378 | static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) |
dd08ebf6 MB |
379 | { |
380 | size_t bytes = sizeof(policy->h2g.header) + | |
381 | (sizeof(policy->h2g.klv[0]) * policy->count); | |
382 | ||
383 | return bytes / sizeof(u32); | |
384 | } | |
385 | ||
9b9529ce FD |
386 | static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, |
387 | u16 guc_id) | |
dd08ebf6 MB |
388 | { |
389 | policy->h2g.header.action = | |
390 | XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; | |
391 | policy->h2g.header.guc_id = guc_id; | |
392 | policy->count = 0; | |
393 | } | |
394 | ||
9b9529ce FD |
395 | #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ |
396 | static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ | |
dd08ebf6 MB |
397 | u32 data) \ |
398 | { \ | |
99fea682 | 399 | XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ |
3e8e7ee6 | 400 | \ |
dd08ebf6 MB |
401 | policy->h2g.klv[policy->count].kl = \ |
402 | FIELD_PREP(GUC_KLV_0_KEY, \ | |
403 | GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ | |
404 | FIELD_PREP(GUC_KLV_0_LEN, 1); \ | |
405 | policy->h2g.klv[policy->count].value = data; \ | |
406 | policy->count++; \ | |
407 | } | |
408 | ||
9b9529ce FD |
409 | MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) |
410 | MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) | |
411 | MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) | |
412 | #undef MAKE_EXEC_QUEUE_POLICY_ADD | |
dd08ebf6 | 413 | |
9b9529ce FD |
414 | static const int xe_exec_queue_prio_to_guc[] = { |
415 | [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, | |
416 | [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, | |
417 | [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, | |
418 | [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, | |
dd08ebf6 MB |
419 | }; |
420 | ||
9b9529ce | 421 | static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 422 | { |
9b9529ce | 423 | struct exec_queue_policy policy; |
c73acc1e | 424 | struct xe_device *xe = guc_to_xe(guc); |
a8004af3 | 425 | enum xe_exec_queue_priority prio = q->sched_props.priority; |
9b9529ce FD |
426 | u32 timeslice_us = q->sched_props.timeslice_us; |
427 | u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; | |
dd08ebf6 | 428 | |
c73acc1e | 429 | xe_assert(xe, exec_queue_registered(q)); |
dd08ebf6 | 430 | |
9b9529ce FD |
431 | __guc_exec_queue_policy_start_klv(&policy, q->guc->id); |
432 | __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); | |
433 | __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); | |
434 | __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); | |
dd08ebf6 MB |
435 | |
436 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
9b9529ce | 437 | __guc_exec_queue_policy_action_size(&policy), 0, 0); |
dd08ebf6 MB |
438 | } |
439 | ||
9b9529ce | 440 | static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 441 | { |
9b9529ce | 442 | struct exec_queue_policy policy; |
dd08ebf6 | 443 | |
9b9529ce FD |
444 | __guc_exec_queue_policy_start_klv(&policy, q->guc->id); |
445 | __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); | |
dd08ebf6 MB |
446 | |
447 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
9b9529ce | 448 | __guc_exec_queue_policy_action_size(&policy), 0, 0); |
dd08ebf6 MB |
449 | } |
450 | ||
dd08ebf6 | 451 | #define parallel_read(xe_, map_, field_) \ |
1825c492 RV |
452 | xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ |
453 | field_) | |
dd08ebf6 | 454 | #define parallel_write(xe_, map_, field_, val_) \ |
1825c492 RV |
455 | xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ |
456 | field_, val_) | |
dd08ebf6 MB |
457 | |
458 | static void __register_mlrc_engine(struct xe_guc *guc, | |
9b9529ce | 459 | struct xe_exec_queue *q, |
dd08ebf6 MB |
460 | struct guc_ctxt_registration_info *info) |
461 | { | |
462 | #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) | |
c73acc1e | 463 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 MB |
464 | u32 action[MAX_MLRC_REG_SIZE]; |
465 | int len = 0; | |
466 | int i; | |
467 | ||
c73acc1e | 468 | xe_assert(xe, xe_exec_queue_is_parallel(q)); |
dd08ebf6 MB |
469 | |
470 | action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; | |
471 | action[len++] = info->flags; | |
472 | action[len++] = info->context_idx; | |
473 | action[len++] = info->engine_class; | |
474 | action[len++] = info->engine_submit_mask; | |
475 | action[len++] = info->wq_desc_lo; | |
476 | action[len++] = info->wq_desc_hi; | |
477 | action[len++] = info->wq_base_lo; | |
478 | action[len++] = info->wq_base_hi; | |
479 | action[len++] = info->wq_size; | |
9b9529ce | 480 | action[len++] = q->width; |
dd08ebf6 MB |
481 | action[len++] = info->hwlrca_lo; |
482 | action[len++] = info->hwlrca_hi; | |
483 | ||
9b9529ce FD |
484 | for (i = 1; i < q->width; ++i) { |
485 | struct xe_lrc *lrc = q->lrc + i; | |
dd08ebf6 MB |
486 | |
487 | action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); | |
488 | action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); | |
489 | } | |
490 | ||
c73acc1e | 491 | xe_assert(xe, len <= MAX_MLRC_REG_SIZE); |
dd08ebf6 MB |
492 | #undef MAX_MLRC_REG_SIZE |
493 | ||
494 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
495 | } | |
496 | ||
497 | static void __register_engine(struct xe_guc *guc, | |
498 | struct guc_ctxt_registration_info *info) | |
499 | { | |
500 | u32 action[] = { | |
501 | XE_GUC_ACTION_REGISTER_CONTEXT, | |
502 | info->flags, | |
503 | info->context_idx, | |
504 | info->engine_class, | |
505 | info->engine_submit_mask, | |
506 | info->wq_desc_lo, | |
507 | info->wq_desc_hi, | |
508 | info->wq_base_lo, | |
509 | info->wq_base_hi, | |
510 | info->wq_size, | |
511 | info->hwlrca_lo, | |
512 | info->hwlrca_hi, | |
513 | }; | |
514 | ||
515 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); | |
516 | } | |
517 | ||
9b9529ce | 518 | static void register_engine(struct xe_exec_queue *q) |
dd08ebf6 | 519 | { |
9b9529ce | 520 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 521 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 522 | struct xe_lrc *lrc = q->lrc; |
dd08ebf6 MB |
523 | struct guc_ctxt_registration_info info; |
524 | ||
c73acc1e | 525 | xe_assert(xe, !exec_queue_registered(q)); |
dd08ebf6 MB |
526 | |
527 | memset(&info, 0, sizeof(info)); | |
9b9529ce FD |
528 | info.context_idx = q->guc->id; |
529 | info.engine_class = xe_engine_class_to_guc_class(q->class); | |
530 | info.engine_submit_mask = q->logical_mask; | |
dd08ebf6 MB |
531 | info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); |
532 | info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); | |
533 | info.flags = CONTEXT_REGISTRATION_FLAG_KMD; | |
534 | ||
9b9529ce | 535 | if (xe_exec_queue_is_parallel(q)) { |
dd08ebf6 MB |
536 | u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); |
537 | struct iosys_map map = xe_lrc_parallel_map(lrc); | |
538 | ||
539 | info.wq_desc_lo = lower_32_bits(ggtt_addr + | |
1825c492 | 540 | offsetof(struct guc_submit_parallel_scratch, wq_desc)); |
dd08ebf6 | 541 | info.wq_desc_hi = upper_32_bits(ggtt_addr + |
1825c492 | 542 | offsetof(struct guc_submit_parallel_scratch, wq_desc)); |
dd08ebf6 | 543 | info.wq_base_lo = lower_32_bits(ggtt_addr + |
1825c492 | 544 | offsetof(struct guc_submit_parallel_scratch, wq[0])); |
dd08ebf6 | 545 | info.wq_base_hi = upper_32_bits(ggtt_addr + |
1825c492 | 546 | offsetof(struct guc_submit_parallel_scratch, wq[0])); |
dd08ebf6 MB |
547 | info.wq_size = WQ_SIZE; |
548 | ||
9b9529ce FD |
549 | q->guc->wqi_head = 0; |
550 | q->guc->wqi_tail = 0; | |
dd08ebf6 MB |
551 | xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); |
552 | parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); | |
553 | } | |
554 | ||
8ae8a2e8 MB |
555 | /* |
556 | * We must keep a reference for LR engines if engine is registered with | |
557 | * the GuC as jobs signal immediately and can't destroy an engine if the | |
558 | * GuC has a reference to it. | |
559 | */ | |
9b9529ce FD |
560 | if (xe_exec_queue_is_lr(q)) |
561 | xe_exec_queue_get(q); | |
8ae8a2e8 | 562 | |
9b9529ce FD |
563 | set_exec_queue_registered(q); |
564 | trace_xe_exec_queue_register(q); | |
565 | if (xe_exec_queue_is_parallel(q)) | |
566 | __register_mlrc_engine(guc, q, &info); | |
dd08ebf6 MB |
567 | else |
568 | __register_engine(guc, &info); | |
9b9529ce | 569 | init_policies(guc, q); |
dd08ebf6 MB |
570 | } |
571 | ||
9b9529ce | 572 | static u32 wq_space_until_wrap(struct xe_exec_queue *q) |
dd08ebf6 | 573 | { |
9b9529ce | 574 | return (WQ_SIZE - q->guc->wqi_tail); |
dd08ebf6 MB |
575 | } |
576 | ||
9b9529ce | 577 | static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) |
dd08ebf6 | 578 | { |
9b9529ce | 579 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 580 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 581 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
dd08ebf6 MB |
582 | unsigned int sleep_period_ms = 1; |
583 | ||
584 | #define AVAILABLE_SPACE \ | |
9b9529ce | 585 | CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) |
dd08ebf6 MB |
586 | if (wqi_size > AVAILABLE_SPACE) { |
587 | try_again: | |
9b9529ce | 588 | q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); |
dd08ebf6 MB |
589 | if (wqi_size > AVAILABLE_SPACE) { |
590 | if (sleep_period_ms == 1024) { | |
9b9529ce | 591 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
592 | return -ENODEV; |
593 | } | |
594 | ||
595 | msleep(sleep_period_ms); | |
596 | sleep_period_ms <<= 1; | |
597 | goto try_again; | |
598 | } | |
599 | } | |
600 | #undef AVAILABLE_SPACE | |
601 | ||
602 | return 0; | |
603 | } | |
604 | ||
9b9529ce | 605 | static int wq_noop_append(struct xe_exec_queue *q) |
dd08ebf6 | 606 | { |
9b9529ce | 607 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 608 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce FD |
609 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
610 | u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; | |
dd08ebf6 | 611 | |
9b9529ce | 612 | if (wq_wait_for_space(q, wq_space_until_wrap(q))) |
dd08ebf6 MB |
613 | return -ENODEV; |
614 | ||
c73acc1e | 615 | xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); |
dd08ebf6 | 616 | |
9b9529ce | 617 | parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], |
dd08ebf6 MB |
618 | FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | |
619 | FIELD_PREP(WQ_LEN_MASK, len_dw)); | |
9b9529ce | 620 | q->guc->wqi_tail = 0; |
dd08ebf6 MB |
621 | |
622 | return 0; | |
623 | } | |
624 | ||
9b9529ce | 625 | static void wq_item_append(struct xe_exec_queue *q) |
dd08ebf6 | 626 | { |
9b9529ce | 627 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 628 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 629 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
e3828ebf MB |
630 | #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ |
631 | u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; | |
9b9529ce | 632 | u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); |
dd08ebf6 MB |
633 | u32 len_dw = (wqi_size / sizeof(u32)) - 1; |
634 | int i = 0, j; | |
635 | ||
9b9529ce FD |
636 | if (wqi_size > wq_space_until_wrap(q)) { |
637 | if (wq_noop_append(q)) | |
dd08ebf6 MB |
638 | return; |
639 | } | |
9b9529ce | 640 | if (wq_wait_for_space(q, wqi_size)) |
dd08ebf6 MB |
641 | return; |
642 | ||
643 | wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | | |
644 | FIELD_PREP(WQ_LEN_MASK, len_dw); | |
9b9529ce FD |
645 | wqi[i++] = xe_lrc_descriptor(q->lrc); |
646 | wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | | |
647 | FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); | |
dd08ebf6 | 648 | wqi[i++] = 0; |
9b9529ce FD |
649 | for (j = 1; j < q->width; ++j) { |
650 | struct xe_lrc *lrc = q->lrc + j; | |
dd08ebf6 MB |
651 | |
652 | wqi[i++] = lrc->ring.tail / sizeof(u64); | |
653 | } | |
654 | ||
c73acc1e | 655 | xe_assert(xe, i == wqi_size / sizeof(u32)); |
dd08ebf6 | 656 | |
1825c492 | 657 | iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, |
9b9529ce | 658 | wq[q->guc->wqi_tail / sizeof(u32)])); |
dd08ebf6 | 659 | xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); |
9b9529ce | 660 | q->guc->wqi_tail += wqi_size; |
c73acc1e | 661 | xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); |
dd08ebf6 MB |
662 | |
663 | xe_device_wmb(xe); | |
664 | ||
9b9529ce FD |
665 | map = xe_lrc_parallel_map(q->lrc); |
666 | parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); | |
dd08ebf6 MB |
667 | } |
668 | ||
669 | #define RESUME_PENDING ~0x0ull | |
9b9529ce | 670 | static void submit_exec_queue(struct xe_exec_queue *q) |
dd08ebf6 | 671 | { |
9b9529ce | 672 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 673 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 674 | struct xe_lrc *lrc = q->lrc; |
dd08ebf6 MB |
675 | u32 action[3]; |
676 | u32 g2h_len = 0; | |
677 | u32 num_g2h = 0; | |
678 | int len = 0; | |
679 | bool extra_submit = false; | |
680 | ||
c73acc1e | 681 | xe_assert(xe, exec_queue_registered(q)); |
dd08ebf6 | 682 | |
9b9529ce FD |
683 | if (xe_exec_queue_is_parallel(q)) |
684 | wq_item_append(q); | |
dd08ebf6 MB |
685 | else |
686 | xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); | |
687 | ||
9b9529ce | 688 | if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) |
dd08ebf6 MB |
689 | return; |
690 | ||
9b9529ce | 691 | if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { |
dd08ebf6 | 692 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; |
9b9529ce | 693 | action[len++] = q->guc->id; |
dd08ebf6 MB |
694 | action[len++] = GUC_CONTEXT_ENABLE; |
695 | g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; | |
696 | num_g2h = 1; | |
9b9529ce | 697 | if (xe_exec_queue_is_parallel(q)) |
dd08ebf6 MB |
698 | extra_submit = true; |
699 | ||
9b9529ce FD |
700 | q->guc->resume_time = RESUME_PENDING; |
701 | set_exec_queue_pending_enable(q); | |
702 | set_exec_queue_enabled(q); | |
703 | trace_xe_exec_queue_scheduling_enable(q); | |
dd08ebf6 MB |
704 | } else { |
705 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
9b9529ce FD |
706 | action[len++] = q->guc->id; |
707 | trace_xe_exec_queue_submit(q); | |
dd08ebf6 MB |
708 | } |
709 | ||
710 | xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); | |
711 | ||
712 | if (extra_submit) { | |
713 | len = 0; | |
714 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
9b9529ce FD |
715 | action[len++] = q->guc->id; |
716 | trace_xe_exec_queue_submit(q); | |
dd08ebf6 MB |
717 | |
718 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
719 | } | |
720 | } | |
721 | ||
722 | static struct dma_fence * | |
9b9529ce | 723 | guc_exec_queue_run_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
724 | { |
725 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
9b9529ce | 726 | struct xe_exec_queue *q = job->q; |
c73acc1e FD |
727 | struct xe_guc *guc = exec_queue_to_guc(q); |
728 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 729 | bool lr = xe_exec_queue_is_lr(q); |
dd08ebf6 | 730 | |
c73acc1e FD |
731 | xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || |
732 | exec_queue_banned(q) || exec_queue_suspended(q)); | |
dd08ebf6 MB |
733 | |
734 | trace_xe_sched_job_run(job); | |
735 | ||
9b9529ce FD |
736 | if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { |
737 | if (!exec_queue_registered(q)) | |
738 | register_engine(q); | |
8ae8a2e8 | 739 | if (!lr) /* LR jobs are emitted in the exec IOCTL */ |
9b9529ce FD |
740 | q->ring_ops->emit_job(job); |
741 | submit_exec_queue(q); | |
dd08ebf6 MB |
742 | } |
743 | ||
8ae8a2e8 MB |
744 | if (lr) { |
745 | xe_sched_job_set_error(job, -EOPNOTSUPP); | |
746 | return NULL; | |
747 | } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { | |
dd08ebf6 | 748 | return job->fence; |
8ae8a2e8 | 749 | } else { |
dd08ebf6 | 750 | return dma_fence_get(job->fence); |
8ae8a2e8 | 751 | } |
dd08ebf6 MB |
752 | } |
753 | ||
9b9529ce | 754 | static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
755 | { |
756 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
757 | ||
758 | trace_xe_sched_job_free(job); | |
759 | xe_sched_job_put(job); | |
760 | } | |
761 | ||
762 | static int guc_read_stopped(struct xe_guc *guc) | |
763 | { | |
764 | return atomic_read(&guc->submission_state.stopped); | |
765 | } | |
766 | ||
9b9529ce | 767 | #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ |
dd08ebf6 MB |
768 | u32 action[] = { \ |
769 | XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ | |
9b9529ce | 770 | q->guc->id, \ |
dd08ebf6 MB |
771 | GUC_CONTEXT_##enable_disable, \ |
772 | } | |
773 | ||
774 | static void disable_scheduling_deregister(struct xe_guc *guc, | |
9b9529ce | 775 | struct xe_exec_queue *q) |
dd08ebf6 | 776 | { |
9b9529ce | 777 | MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); |
5c0553cd | 778 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 MB |
779 | int ret; |
780 | ||
9b9529ce | 781 | set_min_preemption_timeout(guc, q); |
dd08ebf6 | 782 | smp_rmb(); |
9b9529ce | 783 | ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || |
dd08ebf6 MB |
784 | guc_read_stopped(guc), HZ * 5); |
785 | if (!ret) { | |
9b9529ce | 786 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 | 787 | |
5c0553cd | 788 | drm_warn(&xe->drm, "Pending enable failed to respond"); |
dd08ebf6 | 789 | xe_sched_submission_start(sched); |
9b9529ce | 790 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
791 | xe_sched_tdr_queue_imm(sched); |
792 | return; | |
793 | } | |
794 | ||
9b9529ce FD |
795 | clear_exec_queue_enabled(q); |
796 | set_exec_queue_pending_disable(q); | |
797 | set_exec_queue_destroyed(q); | |
798 | trace_xe_exec_queue_scheduling_disable(q); | |
dd08ebf6 MB |
799 | |
800 | /* | |
801 | * Reserve space for both G2H here as the 2nd G2H is sent from a G2H | |
802 | * handler and we are not allowed to reserved G2H space in handlers. | |
803 | */ | |
804 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
805 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + | |
806 | G2H_LEN_DW_DEREGISTER_CONTEXT, 2); | |
807 | } | |
808 | ||
9b9529ce | 809 | static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); |
dd08ebf6 MB |
810 | |
811 | #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) | |
9b9529ce | 812 | static void simple_error_capture(struct xe_exec_queue *q) |
dd08ebf6 | 813 | { |
9b9529ce | 814 | struct xe_guc *guc = exec_queue_to_guc(q); |
98459fb5 JN |
815 | struct xe_device *xe = guc_to_xe(guc); |
816 | struct drm_printer p = drm_err_printer(&xe->drm, NULL); | |
dd08ebf6 MB |
817 | struct xe_hw_engine *hwe; |
818 | enum xe_hw_engine_id id; | |
9b9529ce FD |
819 | u32 adj_logical_mask = q->logical_mask; |
820 | u32 width_mask = (0x1 << q->width) - 1; | |
dd08ebf6 MB |
821 | int i; |
822 | bool cookie; | |
823 | ||
9b9529ce FD |
824 | if (q->vm && !q->vm->error_capture.capture_once) { |
825 | q->vm->error_capture.capture_once = true; | |
dd08ebf6 | 826 | cookie = dma_fence_begin_signalling(); |
9b9529ce | 827 | for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { |
dd08ebf6 MB |
828 | if (adj_logical_mask & BIT(i)) { |
829 | adj_logical_mask |= width_mask << i; | |
9b9529ce | 830 | i += q->width; |
dd08ebf6 MB |
831 | } else { |
832 | ++i; | |
833 | } | |
834 | } | |
835 | ||
836 | xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); | |
513260df | 837 | xe_guc_ct_print(&guc->ct, &p, true); |
9b9529ce | 838 | guc_exec_queue_print(q, &p); |
dd08ebf6 | 839 | for_each_hw_engine(hwe, guc_to_gt(guc), id) { |
9b9529ce | 840 | if (hwe->class != q->hwe->class || |
dd08ebf6 MB |
841 | !(BIT(hwe->logical_instance) & adj_logical_mask)) |
842 | continue; | |
a4db5555 | 843 | xe_hw_engine_print(hwe, &p); |
dd08ebf6 | 844 | } |
9b9529ce | 845 | xe_analyze_vm(&p, q->vm, q->gt->info.id); |
dd08ebf6 MB |
846 | xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); |
847 | dma_fence_end_signalling(cookie); | |
848 | } | |
849 | } | |
850 | #else | |
9b9529ce | 851 | static void simple_error_capture(struct xe_exec_queue *q) |
dd08ebf6 MB |
852 | { |
853 | } | |
854 | #endif | |
855 | ||
9b9529ce | 856 | static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) |
8ae8a2e8 | 857 | { |
9b9529ce | 858 | struct xe_guc *guc = exec_queue_to_guc(q); |
e670f0b4 BK |
859 | struct xe_device *xe = guc_to_xe(guc); |
860 | ||
861 | /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ | |
862 | wake_up_all(&xe->ufence_wq); | |
8ae8a2e8 | 863 | |
9b9529ce FD |
864 | if (xe_exec_queue_is_lr(q)) |
865 | queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); | |
8ae8a2e8 | 866 | else |
9b9529ce | 867 | xe_sched_tdr_queue_imm(&q->guc->sched); |
8ae8a2e8 MB |
868 | } |
869 | ||
9b9529ce | 870 | static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) |
8ae8a2e8 | 871 | { |
9b9529ce FD |
872 | struct xe_guc_exec_queue *ge = |
873 | container_of(w, struct xe_guc_exec_queue, lr_tdr); | |
874 | struct xe_exec_queue *q = ge->q; | |
5c0553cd FD |
875 | struct xe_guc *guc = exec_queue_to_guc(q); |
876 | struct xe_device *xe = guc_to_xe(guc); | |
8ae8a2e8 MB |
877 | struct xe_gpu_scheduler *sched = &ge->sched; |
878 | ||
c73acc1e | 879 | xe_assert(xe, xe_exec_queue_is_lr(q)); |
9b9529ce | 880 | trace_xe_exec_queue_lr_cleanup(q); |
8ae8a2e8 MB |
881 | |
882 | /* Kill the run_job / process_msg entry points */ | |
883 | xe_sched_submission_stop(sched); | |
884 | ||
31b57683 MA |
885 | /* |
886 | * Engine state now mostly stable, disable scheduling / deregister if | |
887 | * needed. This cleanup routine might be called multiple times, where | |
888 | * the actual async engine deregister drops the final engine ref. | |
889 | * Calling disable_scheduling_deregister will mark the engine as | |
890 | * destroyed and fire off the CT requests to disable scheduling / | |
891 | * deregister, which we only want to do once. We also don't want to mark | |
892 | * the engine as pending_disable again as this may race with the | |
893 | * xe_guc_deregister_done_handler() which treats it as an unexpected | |
894 | * state. | |
895 | */ | |
896 | if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { | |
9b9529ce | 897 | struct xe_guc *guc = exec_queue_to_guc(q); |
8ae8a2e8 MB |
898 | int ret; |
899 | ||
9b9529ce FD |
900 | set_exec_queue_banned(q); |
901 | disable_scheduling_deregister(guc, q); | |
8ae8a2e8 MB |
902 | |
903 | /* | |
904 | * Must wait for scheduling to be disabled before signalling | |
905 | * any fences, if GT broken the GT reset code should signal us. | |
906 | */ | |
907 | ret = wait_event_timeout(guc->ct.wq, | |
9b9529ce | 908 | !exec_queue_pending_disable(q) || |
8ae8a2e8 MB |
909 | guc_read_stopped(guc), HZ * 5); |
910 | if (!ret) { | |
5c0553cd | 911 | drm_warn(&xe->drm, "Schedule disable failed to respond"); |
8ae8a2e8 | 912 | xe_sched_submission_start(sched); |
9b9529ce | 913 | xe_gt_reset_async(q->gt); |
8ae8a2e8 MB |
914 | return; |
915 | } | |
916 | } | |
917 | ||
918 | xe_sched_submission_start(sched); | |
919 | } | |
920 | ||
dd08ebf6 | 921 | static enum drm_gpu_sched_stat |
9b9529ce | 922 | guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) |
dd08ebf6 MB |
923 | { |
924 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
925 | struct xe_sched_job *tmp_job; | |
9b9529ce FD |
926 | struct xe_exec_queue *q = job->q; |
927 | struct xe_gpu_scheduler *sched = &q->guc->sched; | |
928 | struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); | |
dd08ebf6 MB |
929 | int err = -ETIME; |
930 | int i = 0; | |
931 | ||
932 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { | |
dd08ebf6 | 933 | drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", |
9b9529ce | 934 | xe_sched_job_seqno(job), q->guc->id, q->flags); |
996da37f MR |
935 | xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, |
936 | "Kernel-submitted job timed out\n"); | |
937 | xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), | |
938 | "VM job timed out on non-killed execqueue\n"); | |
939 | ||
9b9529ce | 940 | simple_error_capture(q); |
98fefec8 | 941 | xe_devcoredump(job); |
dd08ebf6 MB |
942 | } else { |
943 | drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", | |
9b9529ce | 944 | xe_sched_job_seqno(job), q->guc->id, q->flags); |
dd08ebf6 MB |
945 | } |
946 | trace_xe_sched_job_timedout(job); | |
947 | ||
948 | /* Kill the run_job entry point */ | |
949 | xe_sched_submission_stop(sched); | |
950 | ||
951 | /* | |
952 | * Kernel jobs should never fail, nor should VM jobs if they do | |
953 | * somethings has gone wrong and the GT needs a reset | |
954 | */ | |
9b9529ce FD |
955 | if (q->flags & EXEC_QUEUE_FLAG_KERNEL || |
956 | (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { | |
dd08ebf6 MB |
957 | if (!xe_sched_invalidate_job(job, 2)) { |
958 | xe_sched_add_pending_job(sched, job); | |
959 | xe_sched_submission_start(sched); | |
9b9529ce | 960 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
961 | goto out; |
962 | } | |
963 | } | |
964 | ||
965 | /* Engine state now stable, disable scheduling if needed */ | |
ef6ea972 | 966 | if (exec_queue_registered(q)) { |
9b9529ce | 967 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 MB |
968 | int ret; |
969 | ||
9b9529ce | 970 | if (exec_queue_reset(q)) |
dd08ebf6 | 971 | err = -EIO; |
9b9529ce | 972 | set_exec_queue_banned(q); |
ef6ea972 MA |
973 | if (!exec_queue_destroyed(q)) { |
974 | xe_exec_queue_get(q); | |
975 | disable_scheduling_deregister(guc, q); | |
976 | } | |
dd08ebf6 MB |
977 | |
978 | /* | |
979 | * Must wait for scheduling to be disabled before signalling | |
980 | * any fences, if GT broken the GT reset code should signal us. | |
981 | * | |
982 | * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault | |
983 | * error) messages which can cause the schedule disable to get | |
984 | * lost. If this occurs, trigger a GT reset to recover. | |
985 | */ | |
986 | smp_rmb(); | |
987 | ret = wait_event_timeout(guc->ct.wq, | |
9b9529ce | 988 | !exec_queue_pending_disable(q) || |
dd08ebf6 | 989 | guc_read_stopped(guc), HZ * 5); |
ef6ea972 | 990 | if (!ret || guc_read_stopped(guc)) { |
5c0553cd | 991 | drm_warn(&xe->drm, "Schedule disable failed to respond"); |
dd08ebf6 MB |
992 | xe_sched_add_pending_job(sched, job); |
993 | xe_sched_submission_start(sched); | |
9b9529ce | 994 | xe_gt_reset_async(q->gt); |
dd08ebf6 MB |
995 | xe_sched_tdr_queue_imm(sched); |
996 | goto out; | |
997 | } | |
998 | } | |
999 | ||
1000 | /* Stop fence signaling */ | |
9b9529ce | 1001 | xe_hw_fence_irq_stop(q->fence_irq); |
dd08ebf6 MB |
1002 | |
1003 | /* | |
1004 | * Fence state now stable, stop / start scheduler which cleans up any | |
1005 | * fences that are complete | |
1006 | */ | |
1007 | xe_sched_add_pending_job(sched, job); | |
1008 | xe_sched_submission_start(sched); | |
9b9529ce | 1009 | xe_guc_exec_queue_trigger_cleanup(q); |
dd08ebf6 MB |
1010 | |
1011 | /* Mark all outstanding jobs as bad, thus completing them */ | |
1012 | spin_lock(&sched->base.job_list_lock); | |
1013 | list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) | |
1014 | xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); | |
1015 | spin_unlock(&sched->base.job_list_lock); | |
1016 | ||
1017 | /* Start fence signaling */ | |
9b9529ce | 1018 | xe_hw_fence_irq_start(q->fence_irq); |
dd08ebf6 MB |
1019 | |
1020 | out: | |
1021 | return DRM_GPU_SCHED_STAT_NOMINAL; | |
1022 | } | |
1023 | ||
9b9529ce | 1024 | static void __guc_exec_queue_fini_async(struct work_struct *w) |
dd08ebf6 | 1025 | { |
9b9529ce FD |
1026 | struct xe_guc_exec_queue *ge = |
1027 | container_of(w, struct xe_guc_exec_queue, fini_async); | |
1028 | struct xe_exec_queue *q = ge->q; | |
1029 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1030 | |
9b9529ce | 1031 | trace_xe_exec_queue_destroy(q); |
dd08ebf6 | 1032 | |
9b9529ce | 1033 | if (xe_exec_queue_is_lr(q)) |
8ae8a2e8 | 1034 | cancel_work_sync(&ge->lr_tdr); |
9b9529ce | 1035 | release_guc_id(guc, q); |
dd08ebf6 MB |
1036 | xe_sched_entity_fini(&ge->entity); |
1037 | xe_sched_fini(&ge->sched); | |
1038 | ||
a20c75db MB |
1039 | kfree(ge); |
1040 | xe_exec_queue_fini(q); | |
dd08ebf6 MB |
1041 | } |
1042 | ||
9b9529ce | 1043 | static void guc_exec_queue_fini_async(struct xe_exec_queue *q) |
dd08ebf6 | 1044 | { |
9b9529ce | 1045 | INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); |
dd08ebf6 MB |
1046 | |
1047 | /* We must block on kernel engines so slabs are empty on driver unload */ | |
923e4238 | 1048 | if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) |
a20c75db MB |
1049 | __guc_exec_queue_fini_async(&q->guc->fini_async); |
1050 | else | |
1051 | queue_work(system_wq, &q->guc->fini_async); | |
dd08ebf6 MB |
1052 | } |
1053 | ||
9b9529ce | 1054 | static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
1055 | { |
1056 | /* | |
1057 | * Might be done from within the GPU scheduler, need to do async as we | |
1058 | * fini the scheduler when the engine is fini'd, the scheduler can't | |
1059 | * complete fini within itself (circular dependency). Async resolves | |
1060 | * this we and don't really care when everything is fini'd, just that it | |
1061 | * is. | |
1062 | */ | |
9b9529ce | 1063 | guc_exec_queue_fini_async(q); |
dd08ebf6 MB |
1064 | } |
1065 | ||
9b9529ce | 1066 | static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) |
dd08ebf6 | 1067 | { |
9b9529ce FD |
1068 | struct xe_exec_queue *q = msg->private_data; |
1069 | struct xe_guc *guc = exec_queue_to_guc(q); | |
c73acc1e | 1070 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1071 | |
c73acc1e | 1072 | xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); |
9b9529ce | 1073 | trace_xe_exec_queue_cleanup_entity(q); |
dd08ebf6 | 1074 | |
9b9529ce FD |
1075 | if (exec_queue_registered(q)) |
1076 | disable_scheduling_deregister(guc, q); | |
dd08ebf6 | 1077 | else |
9b9529ce | 1078 | __guc_exec_queue_fini(guc, q); |
dd08ebf6 MB |
1079 | } |
1080 | ||
9b9529ce | 1081 | static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) |
dd08ebf6 | 1082 | { |
9b9529ce | 1083 | return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); |
dd08ebf6 MB |
1084 | } |
1085 | ||
9b9529ce | 1086 | static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) |
dd08ebf6 | 1087 | { |
9b9529ce FD |
1088 | struct xe_exec_queue *q = msg->private_data; |
1089 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1090 | |
9b9529ce FD |
1091 | if (guc_exec_queue_allowed_to_change_state(q)) |
1092 | init_policies(guc, q); | |
dd08ebf6 MB |
1093 | kfree(msg); |
1094 | } | |
1095 | ||
9b9529ce | 1096 | static void suspend_fence_signal(struct xe_exec_queue *q) |
dd08ebf6 | 1097 | { |
9b9529ce | 1098 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 1099 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1100 | |
c73acc1e FD |
1101 | xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || |
1102 | guc_read_stopped(guc)); | |
1103 | xe_assert(xe, q->guc->suspend_pending); | |
dd08ebf6 | 1104 | |
9b9529ce | 1105 | q->guc->suspend_pending = false; |
dd08ebf6 | 1106 | smp_wmb(); |
9b9529ce | 1107 | wake_up(&q->guc->suspend_wait); |
dd08ebf6 MB |
1108 | } |
1109 | ||
9b9529ce | 1110 | static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) |
dd08ebf6 | 1111 | { |
9b9529ce FD |
1112 | struct xe_exec_queue *q = msg->private_data; |
1113 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1114 | |
9b9529ce FD |
1115 | if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && |
1116 | exec_queue_enabled(q)) { | |
1117 | wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || | |
dd08ebf6 MB |
1118 | guc_read_stopped(guc)); |
1119 | ||
1120 | if (!guc_read_stopped(guc)) { | |
9b9529ce | 1121 | MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); |
dd08ebf6 MB |
1122 | s64 since_resume_ms = |
1123 | ktime_ms_delta(ktime_get(), | |
9b9529ce FD |
1124 | q->guc->resume_time); |
1125 | s64 wait_ms = q->vm->preempt.min_run_period_ms - | |
dd08ebf6 MB |
1126 | since_resume_ms; |
1127 | ||
9b9529ce | 1128 | if (wait_ms > 0 && q->guc->resume_time) |
dd08ebf6 MB |
1129 | msleep(wait_ms); |
1130 | ||
9b9529ce FD |
1131 | set_exec_queue_suspended(q); |
1132 | clear_exec_queue_enabled(q); | |
1133 | set_exec_queue_pending_disable(q); | |
1134 | trace_xe_exec_queue_scheduling_disable(q); | |
dd08ebf6 MB |
1135 | |
1136 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1137 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1138 | } | |
9b9529ce FD |
1139 | } else if (q->guc->suspend_pending) { |
1140 | set_exec_queue_suspended(q); | |
1141 | suspend_fence_signal(q); | |
dd08ebf6 MB |
1142 | } |
1143 | } | |
1144 | ||
9b9529ce | 1145 | static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) |
dd08ebf6 | 1146 | { |
9b9529ce FD |
1147 | struct xe_exec_queue *q = msg->private_data; |
1148 | struct xe_guc *guc = exec_queue_to_guc(q); | |
dd08ebf6 | 1149 | |
9b9529ce FD |
1150 | if (guc_exec_queue_allowed_to_change_state(q)) { |
1151 | MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); | |
dd08ebf6 | 1152 | |
9b9529ce FD |
1153 | q->guc->resume_time = RESUME_PENDING; |
1154 | clear_exec_queue_suspended(q); | |
1155 | set_exec_queue_pending_enable(q); | |
1156 | set_exec_queue_enabled(q); | |
1157 | trace_xe_exec_queue_scheduling_enable(q); | |
dd08ebf6 MB |
1158 | |
1159 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1160 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1161 | } else { | |
9b9529ce | 1162 | clear_exec_queue_suspended(q); |
dd08ebf6 MB |
1163 | } |
1164 | } | |
1165 | ||
1166 | #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ | |
1167 | #define SET_SCHED_PROPS 2 | |
1168 | #define SUSPEND 3 | |
1169 | #define RESUME 4 | |
1170 | ||
9b9529ce | 1171 | static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) |
dd08ebf6 MB |
1172 | { |
1173 | trace_xe_sched_msg_recv(msg); | |
1174 | ||
1175 | switch (msg->opcode) { | |
1176 | case CLEANUP: | |
9b9529ce | 1177 | __guc_exec_queue_process_msg_cleanup(msg); |
dd08ebf6 MB |
1178 | break; |
1179 | case SET_SCHED_PROPS: | |
9b9529ce | 1180 | __guc_exec_queue_process_msg_set_sched_props(msg); |
dd08ebf6 MB |
1181 | break; |
1182 | case SUSPEND: | |
9b9529ce | 1183 | __guc_exec_queue_process_msg_suspend(msg); |
dd08ebf6 MB |
1184 | break; |
1185 | case RESUME: | |
9b9529ce | 1186 | __guc_exec_queue_process_msg_resume(msg); |
dd08ebf6 MB |
1187 | break; |
1188 | default: | |
99fea682 | 1189 | XE_WARN_ON("Unknown message type"); |
dd08ebf6 MB |
1190 | } |
1191 | } | |
1192 | ||
1193 | static const struct drm_sched_backend_ops drm_sched_ops = { | |
9b9529ce FD |
1194 | .run_job = guc_exec_queue_run_job, |
1195 | .free_job = guc_exec_queue_free_job, | |
1196 | .timedout_job = guc_exec_queue_timedout_job, | |
dd08ebf6 MB |
1197 | }; |
1198 | ||
1199 | static const struct xe_sched_backend_ops xe_sched_ops = { | |
9b9529ce | 1200 | .process_msg = guc_exec_queue_process_msg, |
dd08ebf6 MB |
1201 | }; |
1202 | ||
9b9529ce | 1203 | static int guc_exec_queue_init(struct xe_exec_queue *q) |
dd08ebf6 MB |
1204 | { |
1205 | struct xe_gpu_scheduler *sched; | |
9b9529ce | 1206 | struct xe_guc *guc = exec_queue_to_guc(q); |
c73acc1e | 1207 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 1208 | struct xe_guc_exec_queue *ge; |
dd08ebf6 MB |
1209 | long timeout; |
1210 | int err; | |
1211 | ||
c4991ee0 | 1212 | xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); |
dd08ebf6 MB |
1213 | |
1214 | ge = kzalloc(sizeof(*ge), GFP_KERNEL); | |
1215 | if (!ge) | |
1216 | return -ENOMEM; | |
1217 | ||
9b9529ce FD |
1218 | q->guc = ge; |
1219 | ge->q = q; | |
dd08ebf6 MB |
1220 | init_waitqueue_head(&ge->suspend_wait); |
1221 | ||
fdb6a053 | 1222 | timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : |
9c125636 | 1223 | msecs_to_jiffies(q->sched_props.job_timeout_ms); |
a839e365 MB |
1224 | err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, |
1225 | get_submit_wq(guc), | |
1226 | q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, | |
1227 | timeout, guc_to_gt(guc)->ordered_wq, NULL, | |
1228 | q->name, gt_to_xe(q->gt)->drm.dev); | |
dd08ebf6 MB |
1229 | if (err) |
1230 | goto err_free; | |
1231 | ||
1232 | sched = &ge->sched; | |
1233 | err = xe_sched_entity_init(&ge->entity, sched); | |
1234 | if (err) | |
1235 | goto err_sched; | |
dd08ebf6 | 1236 | |
9b9529ce FD |
1237 | if (xe_exec_queue_is_lr(q)) |
1238 | INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); | |
8ae8a2e8 | 1239 | |
dd08ebf6 MB |
1240 | mutex_lock(&guc->submission_state.lock); |
1241 | ||
9b9529ce | 1242 | err = alloc_guc_id(guc, q); |
dd08ebf6 MB |
1243 | if (err) |
1244 | goto err_entity; | |
1245 | ||
9b9529ce | 1246 | q->entity = &ge->entity; |
dd08ebf6 MB |
1247 | |
1248 | if (guc_read_stopped(guc)) | |
1249 | xe_sched_stop(sched); | |
1250 | ||
1251 | mutex_unlock(&guc->submission_state.lock); | |
1252 | ||
0b1d1473 | 1253 | xe_exec_queue_assign_name(q, q->guc->id); |
dd08ebf6 | 1254 | |
9b9529ce | 1255 | trace_xe_exec_queue_create(q); |
dd08ebf6 MB |
1256 | |
1257 | return 0; | |
1258 | ||
1259 | err_entity: | |
1260 | xe_sched_entity_fini(&ge->entity); | |
1261 | err_sched: | |
1262 | xe_sched_fini(&ge->sched); | |
1263 | err_free: | |
1264 | kfree(ge); | |
1265 | ||
1266 | return err; | |
1267 | } | |
1268 | ||
9b9529ce | 1269 | static void guc_exec_queue_kill(struct xe_exec_queue *q) |
dd08ebf6 | 1270 | { |
9b9529ce FD |
1271 | trace_xe_exec_queue_kill(q); |
1272 | set_exec_queue_killed(q); | |
1273 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1274 | } |
1275 | ||
9b9529ce FD |
1276 | static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, |
1277 | u32 opcode) | |
dd08ebf6 MB |
1278 | { |
1279 | INIT_LIST_HEAD(&msg->link); | |
1280 | msg->opcode = opcode; | |
9b9529ce | 1281 | msg->private_data = q; |
dd08ebf6 MB |
1282 | |
1283 | trace_xe_sched_msg_add(msg); | |
9b9529ce | 1284 | xe_sched_add_msg(&q->guc->sched, msg); |
dd08ebf6 MB |
1285 | } |
1286 | ||
1287 | #define STATIC_MSG_CLEANUP 0 | |
1288 | #define STATIC_MSG_SUSPEND 1 | |
1289 | #define STATIC_MSG_RESUME 2 | |
9b9529ce | 1290 | static void guc_exec_queue_fini(struct xe_exec_queue *q) |
dd08ebf6 | 1291 | { |
9b9529ce | 1292 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; |
dd08ebf6 | 1293 | |
923e4238 | 1294 | if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) |
9b9529ce | 1295 | guc_exec_queue_add_msg(q, msg, CLEANUP); |
dd08ebf6 | 1296 | else |
9b9529ce | 1297 | __guc_exec_queue_fini(exec_queue_to_guc(q), q); |
dd08ebf6 MB |
1298 | } |
1299 | ||
9b9529ce FD |
1300 | static int guc_exec_queue_set_priority(struct xe_exec_queue *q, |
1301 | enum xe_exec_queue_priority priority) | |
dd08ebf6 MB |
1302 | { |
1303 | struct xe_sched_msg *msg; | |
1304 | ||
a8004af3 | 1305 | if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) |
dd08ebf6 MB |
1306 | return 0; |
1307 | ||
1308 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1309 | if (!msg) | |
1310 | return -ENOMEM; | |
1311 | ||
a8004af3 | 1312 | q->sched_props.priority = priority; |
b16483f9 | 1313 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); |
dd08ebf6 MB |
1314 | |
1315 | return 0; | |
1316 | } | |
1317 | ||
9b9529ce | 1318 | static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) |
dd08ebf6 MB |
1319 | { |
1320 | struct xe_sched_msg *msg; | |
1321 | ||
9b9529ce FD |
1322 | if (q->sched_props.timeslice_us == timeslice_us || |
1323 | exec_queue_killed_or_banned(q)) | |
dd08ebf6 MB |
1324 | return 0; |
1325 | ||
1326 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1327 | if (!msg) | |
1328 | return -ENOMEM; | |
1329 | ||
9b9529ce FD |
1330 | q->sched_props.timeslice_us = timeslice_us; |
1331 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); | |
dd08ebf6 MB |
1332 | |
1333 | return 0; | |
1334 | } | |
1335 | ||
9b9529ce FD |
1336 | static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, |
1337 | u32 preempt_timeout_us) | |
dd08ebf6 MB |
1338 | { |
1339 | struct xe_sched_msg *msg; | |
1340 | ||
9b9529ce FD |
1341 | if (q->sched_props.preempt_timeout_us == preempt_timeout_us || |
1342 | exec_queue_killed_or_banned(q)) | |
dd08ebf6 MB |
1343 | return 0; |
1344 | ||
1345 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1346 | if (!msg) | |
1347 | return -ENOMEM; | |
1348 | ||
9b9529ce FD |
1349 | q->sched_props.preempt_timeout_us = preempt_timeout_us; |
1350 | guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); | |
dd08ebf6 MB |
1351 | |
1352 | return 0; | |
1353 | } | |
1354 | ||
9b9529ce | 1355 | static int guc_exec_queue_suspend(struct xe_exec_queue *q) |
dd08ebf6 | 1356 | { |
9b9529ce | 1357 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; |
dd08ebf6 | 1358 | |
9b9529ce | 1359 | if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) |
dd08ebf6 MB |
1360 | return -EINVAL; |
1361 | ||
9b9529ce FD |
1362 | q->guc->suspend_pending = true; |
1363 | guc_exec_queue_add_msg(q, msg, SUSPEND); | |
dd08ebf6 MB |
1364 | |
1365 | return 0; | |
1366 | } | |
1367 | ||
9b9529ce | 1368 | static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) |
dd08ebf6 | 1369 | { |
9b9529ce | 1370 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 1371 | |
9b9529ce | 1372 | wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || |
dd08ebf6 MB |
1373 | guc_read_stopped(guc)); |
1374 | } | |
1375 | ||
9b9529ce | 1376 | static void guc_exec_queue_resume(struct xe_exec_queue *q) |
dd08ebf6 | 1377 | { |
9b9529ce | 1378 | struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; |
c73acc1e FD |
1379 | struct xe_guc *guc = exec_queue_to_guc(q); |
1380 | struct xe_device *xe = guc_to_xe(guc); | |
dd08ebf6 | 1381 | |
c73acc1e | 1382 | xe_assert(xe, !q->guc->suspend_pending); |
dd08ebf6 | 1383 | |
9b9529ce | 1384 | guc_exec_queue_add_msg(q, msg, RESUME); |
dd08ebf6 MB |
1385 | } |
1386 | ||
e670f0b4 BK |
1387 | static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) |
1388 | { | |
1389 | return exec_queue_reset(q); | |
1390 | } | |
1391 | ||
dd08ebf6 MB |
1392 | /* |
1393 | * All of these functions are an abstraction layer which other parts of XE can | |
1394 | * use to trap into the GuC backend. All of these functions, aside from init, | |
1395 | * really shouldn't do much other than trap into the DRM scheduler which | |
1396 | * synchronizes these operations. | |
1397 | */ | |
9b9529ce FD |
1398 | static const struct xe_exec_queue_ops guc_exec_queue_ops = { |
1399 | .init = guc_exec_queue_init, | |
1400 | .kill = guc_exec_queue_kill, | |
1401 | .fini = guc_exec_queue_fini, | |
1402 | .set_priority = guc_exec_queue_set_priority, | |
1403 | .set_timeslice = guc_exec_queue_set_timeslice, | |
1404 | .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, | |
9b9529ce FD |
1405 | .suspend = guc_exec_queue_suspend, |
1406 | .suspend_wait = guc_exec_queue_suspend_wait, | |
1407 | .resume = guc_exec_queue_resume, | |
e670f0b4 | 1408 | .reset_status = guc_exec_queue_reset_status, |
dd08ebf6 MB |
1409 | }; |
1410 | ||
9b9529ce | 1411 | static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 | 1412 | { |
9b9529ce | 1413 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 MB |
1414 | |
1415 | /* Stop scheduling + flush any DRM scheduler operations */ | |
1416 | xe_sched_submission_stop(sched); | |
1417 | ||
1418 | /* Clean up lost G2H + reset engine state */ | |
9b9529ce FD |
1419 | if (exec_queue_registered(q)) { |
1420 | if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || | |
1421 | xe_exec_queue_is_lr(q)) | |
1422 | xe_exec_queue_put(q); | |
1423 | else if (exec_queue_destroyed(q)) | |
1424 | __guc_exec_queue_fini(guc, q); | |
dd08ebf6 | 1425 | } |
9b9529ce FD |
1426 | if (q->guc->suspend_pending) { |
1427 | set_exec_queue_suspended(q); | |
1428 | suspend_fence_signal(q); | |
dd08ebf6 | 1429 | } |
9b9529ce FD |
1430 | atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, |
1431 | &q->guc->state); | |
1432 | q->guc->resume_time = 0; | |
1433 | trace_xe_exec_queue_stop(q); | |
dd08ebf6 MB |
1434 | |
1435 | /* | |
1436 | * Ban any engine (aside from kernel and engines used for VM ops) with a | |
1437 | * started but not complete job or if a job has gone through a GT reset | |
1438 | * more than twice. | |
1439 | */ | |
9b9529ce | 1440 | if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { |
dd08ebf6 MB |
1441 | struct xe_sched_job *job = xe_sched_first_pending_job(sched); |
1442 | ||
1443 | if (job) { | |
1444 | if ((xe_sched_job_started(job) && | |
1445 | !xe_sched_job_completed(job)) || | |
1446 | xe_sched_invalidate_job(job, 2)) { | |
1447 | trace_xe_sched_job_ban(job); | |
9b9529ce FD |
1448 | xe_sched_tdr_queue_imm(&q->guc->sched); |
1449 | set_exec_queue_banned(q); | |
dd08ebf6 MB |
1450 | } |
1451 | } | |
1452 | } | |
1453 | } | |
1454 | ||
1455 | int xe_guc_submit_reset_prepare(struct xe_guc *guc) | |
1456 | { | |
1457 | int ret; | |
1458 | ||
1459 | /* | |
1460 | * Using an atomic here rather than submission_state.lock as this | |
1461 | * function can be called while holding the CT lock (engine reset | |
1462 | * failure). submission_state.lock needs the CT lock to resubmit jobs. | |
1463 | * Atomic is not ideal, but it works to prevent against concurrent reset | |
1464 | * and releasing any TDRs waiting on guc->submission_state.stopped. | |
1465 | */ | |
1466 | ret = atomic_fetch_or(1, &guc->submission_state.stopped); | |
1467 | smp_wmb(); | |
1468 | wake_up_all(&guc->ct.wq); | |
1469 | ||
1470 | return ret; | |
1471 | } | |
1472 | ||
1473 | void xe_guc_submit_reset_wait(struct xe_guc *guc) | |
1474 | { | |
1475 | wait_event(guc->ct.wq, !guc_read_stopped(guc)); | |
1476 | } | |
1477 | ||
1478 | int xe_guc_submit_stop(struct xe_guc *guc) | |
1479 | { | |
9b9529ce | 1480 | struct xe_exec_queue *q; |
dd08ebf6 | 1481 | unsigned long index; |
c73acc1e | 1482 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1483 | |
c73acc1e | 1484 | xe_assert(xe, guc_read_stopped(guc) == 1); |
dd08ebf6 MB |
1485 | |
1486 | mutex_lock(&guc->submission_state.lock); | |
1487 | ||
9b9529ce FD |
1488 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1489 | guc_exec_queue_stop(guc, q); | |
dd08ebf6 MB |
1490 | |
1491 | mutex_unlock(&guc->submission_state.lock); | |
1492 | ||
1493 | /* | |
1494 | * No one can enter the backend at this point, aside from new engine | |
1495 | * creation which is protected by guc->submission_state.lock. | |
1496 | */ | |
1497 | ||
1498 | return 0; | |
1499 | } | |
1500 | ||
9b9529ce | 1501 | static void guc_exec_queue_start(struct xe_exec_queue *q) |
dd08ebf6 | 1502 | { |
9b9529ce | 1503 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
dd08ebf6 | 1504 | |
9b9529ce | 1505 | if (!exec_queue_killed_or_banned(q)) { |
dd08ebf6 MB |
1506 | int i; |
1507 | ||
9b9529ce FD |
1508 | trace_xe_exec_queue_resubmit(q); |
1509 | for (i = 0; i < q->width; ++i) | |
1510 | xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); | |
dd08ebf6 MB |
1511 | xe_sched_resubmit_jobs(sched); |
1512 | } | |
1513 | ||
1514 | xe_sched_submission_start(sched); | |
1515 | } | |
1516 | ||
1517 | int xe_guc_submit_start(struct xe_guc *guc) | |
1518 | { | |
9b9529ce | 1519 | struct xe_exec_queue *q; |
dd08ebf6 | 1520 | unsigned long index; |
c73acc1e | 1521 | struct xe_device *xe = guc_to_xe(guc); |
dd08ebf6 | 1522 | |
c73acc1e | 1523 | xe_assert(xe, guc_read_stopped(guc) == 1); |
dd08ebf6 MB |
1524 | |
1525 | mutex_lock(&guc->submission_state.lock); | |
1526 | atomic_dec(&guc->submission_state.stopped); | |
9b9529ce FD |
1527 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1528 | guc_exec_queue_start(q); | |
dd08ebf6 MB |
1529 | mutex_unlock(&guc->submission_state.lock); |
1530 | ||
1531 | wake_up_all(&guc->ct.wq); | |
1532 | ||
1533 | return 0; | |
1534 | } | |
1535 | ||
9b9529ce FD |
1536 | static struct xe_exec_queue * |
1537 | g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) | |
dd08ebf6 MB |
1538 | { |
1539 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1540 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1541 | |
1542 | if (unlikely(guc_id >= GUC_ID_MAX)) { | |
1543 | drm_err(&xe->drm, "Invalid guc_id %u", guc_id); | |
1544 | return NULL; | |
1545 | } | |
1546 | ||
9b9529ce FD |
1547 | q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); |
1548 | if (unlikely(!q)) { | |
dd08ebf6 MB |
1549 | drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); |
1550 | return NULL; | |
1551 | } | |
1552 | ||
cb90d469 DCS |
1553 | xe_assert(xe, guc_id >= q->guc->id); |
1554 | xe_assert(xe, guc_id < (q->guc->id + q->width)); | |
dd08ebf6 | 1555 | |
9b9529ce | 1556 | return q; |
dd08ebf6 MB |
1557 | } |
1558 | ||
9b9529ce | 1559 | static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) |
dd08ebf6 MB |
1560 | { |
1561 | u32 action[] = { | |
1562 | XE_GUC_ACTION_DEREGISTER_CONTEXT, | |
9b9529ce | 1563 | q->guc->id, |
dd08ebf6 MB |
1564 | }; |
1565 | ||
9b9529ce | 1566 | trace_xe_exec_queue_deregister(q); |
dd08ebf6 MB |
1567 | |
1568 | xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); | |
1569 | } | |
1570 | ||
1571 | int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1572 | { | |
1573 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1574 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1575 | u32 guc_id = msg[0]; |
1576 | ||
1577 | if (unlikely(len < 2)) { | |
1578 | drm_err(&xe->drm, "Invalid length %u", len); | |
1579 | return -EPROTO; | |
1580 | } | |
1581 | ||
9b9529ce FD |
1582 | q = g2h_exec_queue_lookup(guc, guc_id); |
1583 | if (unlikely(!q)) | |
dd08ebf6 MB |
1584 | return -EPROTO; |
1585 | ||
9b9529ce FD |
1586 | if (unlikely(!exec_queue_pending_enable(q) && |
1587 | !exec_queue_pending_disable(q))) { | |
dd08ebf6 | 1588 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", |
9b9529ce | 1589 | atomic_read(&q->guc->state)); |
dd08ebf6 MB |
1590 | return -EPROTO; |
1591 | } | |
1592 | ||
9b9529ce | 1593 | trace_xe_exec_queue_scheduling_done(q); |
dd08ebf6 | 1594 | |
9b9529ce FD |
1595 | if (exec_queue_pending_enable(q)) { |
1596 | q->guc->resume_time = ktime_get(); | |
1597 | clear_exec_queue_pending_enable(q); | |
dd08ebf6 MB |
1598 | smp_wmb(); |
1599 | wake_up_all(&guc->ct.wq); | |
1600 | } else { | |
9b9529ce FD |
1601 | clear_exec_queue_pending_disable(q); |
1602 | if (q->guc->suspend_pending) { | |
1603 | suspend_fence_signal(q); | |
dd08ebf6 | 1604 | } else { |
9b9529ce | 1605 | if (exec_queue_banned(q)) { |
dd08ebf6 MB |
1606 | smp_wmb(); |
1607 | wake_up_all(&guc->ct.wq); | |
1608 | } | |
9b9529ce | 1609 | deregister_exec_queue(guc, q); |
dd08ebf6 MB |
1610 | } |
1611 | } | |
1612 | ||
1613 | return 0; | |
1614 | } | |
1615 | ||
1616 | int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1617 | { | |
1618 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1619 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1620 | u32 guc_id = msg[0]; |
1621 | ||
1622 | if (unlikely(len < 1)) { | |
1623 | drm_err(&xe->drm, "Invalid length %u", len); | |
1624 | return -EPROTO; | |
1625 | } | |
1626 | ||
9b9529ce FD |
1627 | q = g2h_exec_queue_lookup(guc, guc_id); |
1628 | if (unlikely(!q)) | |
dd08ebf6 MB |
1629 | return -EPROTO; |
1630 | ||
9b9529ce FD |
1631 | if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || |
1632 | exec_queue_pending_enable(q) || exec_queue_enabled(q)) { | |
dd08ebf6 | 1633 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", |
9b9529ce | 1634 | atomic_read(&q->guc->state)); |
dd08ebf6 MB |
1635 | return -EPROTO; |
1636 | } | |
1637 | ||
9b9529ce | 1638 | trace_xe_exec_queue_deregister_done(q); |
dd08ebf6 | 1639 | |
9b9529ce | 1640 | clear_exec_queue_registered(q); |
8ae8a2e8 | 1641 | |
9b9529ce FD |
1642 | if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) |
1643 | xe_exec_queue_put(q); | |
dd08ebf6 | 1644 | else |
9b9529ce | 1645 | __guc_exec_queue_fini(guc, q); |
dd08ebf6 MB |
1646 | |
1647 | return 0; | |
1648 | } | |
1649 | ||
9b9529ce | 1650 | int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) |
dd08ebf6 MB |
1651 | { |
1652 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1653 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1654 | u32 guc_id = msg[0]; |
1655 | ||
1656 | if (unlikely(len < 1)) { | |
1657 | drm_err(&xe->drm, "Invalid length %u", len); | |
1658 | return -EPROTO; | |
1659 | } | |
1660 | ||
9b9529ce FD |
1661 | q = g2h_exec_queue_lookup(guc, guc_id); |
1662 | if (unlikely(!q)) | |
dd08ebf6 MB |
1663 | return -EPROTO; |
1664 | ||
1665 | drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); | |
1666 | ||
1667 | /* FIXME: Do error capture, most likely async */ | |
1668 | ||
9b9529ce | 1669 | trace_xe_exec_queue_reset(q); |
dd08ebf6 MB |
1670 | |
1671 | /* | |
1672 | * A banned engine is a NOP at this point (came from | |
9b9529ce | 1673 | * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel |
dd08ebf6 | 1674 | * jobs by setting timeout of the job to the minimum value kicking |
9b9529ce | 1675 | * guc_exec_queue_timedout_job. |
dd08ebf6 | 1676 | */ |
9b9529ce FD |
1677 | set_exec_queue_reset(q); |
1678 | if (!exec_queue_banned(q)) | |
1679 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1680 | |
1681 | return 0; | |
1682 | } | |
1683 | ||
9b9529ce FD |
1684 | int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, |
1685 | u32 len) | |
dd08ebf6 MB |
1686 | { |
1687 | struct xe_device *xe = guc_to_xe(guc); | |
9b9529ce | 1688 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1689 | u32 guc_id = msg[0]; |
1690 | ||
1691 | if (unlikely(len < 1)) { | |
1692 | drm_err(&xe->drm, "Invalid length %u", len); | |
1693 | return -EPROTO; | |
1694 | } | |
1695 | ||
9b9529ce FD |
1696 | q = g2h_exec_queue_lookup(guc, guc_id); |
1697 | if (unlikely(!q)) | |
dd08ebf6 MB |
1698 | return -EPROTO; |
1699 | ||
17d28aa8 | 1700 | drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); |
9b9529ce | 1701 | trace_xe_exec_queue_memory_cat_error(q); |
dd08ebf6 MB |
1702 | |
1703 | /* Treat the same as engine reset */ | |
9b9529ce FD |
1704 | set_exec_queue_reset(q); |
1705 | if (!exec_queue_banned(q)) | |
1706 | xe_guc_exec_queue_trigger_cleanup(q); | |
dd08ebf6 MB |
1707 | |
1708 | return 0; | |
1709 | } | |
1710 | ||
9b9529ce | 1711 | int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) |
dd08ebf6 MB |
1712 | { |
1713 | struct xe_device *xe = guc_to_xe(guc); | |
1714 | u8 guc_class, instance; | |
1715 | u32 reason; | |
1716 | ||
1717 | if (unlikely(len != 3)) { | |
1718 | drm_err(&xe->drm, "Invalid length %u", len); | |
1719 | return -EPROTO; | |
1720 | } | |
1721 | ||
1722 | guc_class = msg[0]; | |
1723 | instance = msg[1]; | |
1724 | reason = msg[2]; | |
1725 | ||
1726 | /* Unexpected failure of a hardware feature, log an actual error */ | |
1727 | drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", | |
1728 | guc_class, instance, reason); | |
1729 | ||
1730 | xe_gt_reset_async(guc_to_gt(guc)); | |
1731 | ||
1732 | return 0; | |
1733 | } | |
1734 | ||
bbdf97c1 | 1735 | static void |
9b9529ce FD |
1736 | guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, |
1737 | struct xe_guc_submit_exec_queue_snapshot *snapshot) | |
dd08ebf6 | 1738 | { |
9b9529ce | 1739 | struct xe_guc *guc = exec_queue_to_guc(q); |
dd08ebf6 | 1740 | struct xe_device *xe = guc_to_xe(guc); |
9b9529ce | 1741 | struct iosys_map map = xe_lrc_parallel_map(q->lrc); |
dd08ebf6 MB |
1742 | int i; |
1743 | ||
9b9529ce FD |
1744 | snapshot->guc.wqi_head = q->guc->wqi_head; |
1745 | snapshot->guc.wqi_tail = q->guc->wqi_tail; | |
bbdf97c1 RV |
1746 | snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); |
1747 | snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); | |
1748 | snapshot->parallel.wq_desc.status = parallel_read(xe, map, | |
1749 | wq_desc.wq_status); | |
1750 | ||
1751 | if (snapshot->parallel.wq_desc.head != | |
1752 | snapshot->parallel.wq_desc.tail) { | |
1753 | for (i = snapshot->parallel.wq_desc.head; | |
1754 | i != snapshot->parallel.wq_desc.tail; | |
1755 | i = (i + sizeof(u32)) % WQ_SIZE) | |
1756 | snapshot->parallel.wq[i / sizeof(u32)] = | |
1757 | parallel_read(xe, map, wq[i / sizeof(u32)]); | |
1758 | } | |
1759 | } | |
1760 | ||
1761 | static void | |
9b9529ce FD |
1762 | guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, |
1763 | struct drm_printer *p) | |
bbdf97c1 RV |
1764 | { |
1765 | int i; | |
1766 | ||
dd08ebf6 | 1767 | drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", |
bbdf97c1 | 1768 | snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); |
dd08ebf6 | 1769 | drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", |
bbdf97c1 RV |
1770 | snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); |
1771 | drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); | |
1772 | ||
1773 | if (snapshot->parallel.wq_desc.head != | |
1774 | snapshot->parallel.wq_desc.tail) { | |
1775 | for (i = snapshot->parallel.wq_desc.head; | |
1776 | i != snapshot->parallel.wq_desc.tail; | |
dd08ebf6 | 1777 | i = (i + sizeof(u32)) % WQ_SIZE) |
857912c3 | 1778 | drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), |
bbdf97c1 | 1779 | snapshot->parallel.wq[i / sizeof(u32)]); |
dd08ebf6 MB |
1780 | } |
1781 | } | |
1782 | ||
bbdf97c1 | 1783 | /** |
9b9529ce | 1784 | * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. |
98fefec8 | 1785 | * @job: faulty Xe scheduled job. |
bbdf97c1 RV |
1786 | * |
1787 | * This can be printed out in a later stage like during dev_coredump | |
1788 | * analysis. | |
1789 | * | |
1790 | * Returns: a GuC Submit Engine snapshot object that must be freed by the | |
9b9529ce | 1791 | * caller, using `xe_guc_exec_queue_snapshot_free`. |
bbdf97c1 | 1792 | */ |
9b9529ce | 1793 | struct xe_guc_submit_exec_queue_snapshot * |
98fefec8 | 1794 | xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) |
dd08ebf6 | 1795 | { |
98fefec8 | 1796 | struct xe_exec_queue *q = job->q; |
9b9529ce | 1797 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
9b9529ce | 1798 | struct xe_guc_submit_exec_queue_snapshot *snapshot; |
bbdf97c1 RV |
1799 | int i; |
1800 | ||
1801 | snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); | |
1802 | ||
8491b0ef | 1803 | if (!snapshot) |
bbdf97c1 | 1804 | return NULL; |
bbdf97c1 | 1805 | |
9b9529ce FD |
1806 | snapshot->guc.id = q->guc->id; |
1807 | memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); | |
1808 | snapshot->class = q->class; | |
1809 | snapshot->logical_mask = q->logical_mask; | |
1810 | snapshot->width = q->width; | |
1811 | snapshot->refcount = kref_read(&q->refcount); | |
bbdf97c1 | 1812 | snapshot->sched_timeout = sched->base.timeout; |
9b9529ce | 1813 | snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; |
bbdf97c1 | 1814 | snapshot->sched_props.preempt_timeout_us = |
9b9529ce | 1815 | q->sched_props.preempt_timeout_us; |
bbdf97c1 | 1816 | |
9b9529ce | 1817 | snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), |
bbdf97c1 RV |
1818 | GFP_ATOMIC); |
1819 | ||
8491b0ef | 1820 | if (snapshot->lrc) { |
9b9529ce FD |
1821 | for (i = 0; i < q->width; ++i) { |
1822 | struct xe_lrc *lrc = q->lrc + i; | |
bbdf97c1 RV |
1823 | |
1824 | snapshot->lrc[i].context_desc = | |
1825 | lower_32_bits(xe_lrc_ggtt_addr(lrc)); | |
1826 | snapshot->lrc[i].head = xe_lrc_ring_head(lrc); | |
1827 | snapshot->lrc[i].tail.internal = lrc->ring.tail; | |
1828 | snapshot->lrc[i].tail.memory = | |
1829 | xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); | |
1830 | snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); | |
1831 | snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); | |
1832 | } | |
1833 | } | |
1834 | ||
9b9529ce FD |
1835 | snapshot->schedule_state = atomic_read(&q->guc->state); |
1836 | snapshot->exec_queue_flags = q->flags; | |
bbdf97c1 | 1837 | |
9b9529ce | 1838 | snapshot->parallel_execution = xe_exec_queue_is_parallel(q); |
bbdf97c1 | 1839 | if (snapshot->parallel_execution) |
9b9529ce | 1840 | guc_exec_queue_wq_snapshot_capture(q, snapshot); |
bbdf97c1 RV |
1841 | |
1842 | spin_lock(&sched->base.job_list_lock); | |
1843 | snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); | |
1844 | snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, | |
1845 | sizeof(struct pending_list_snapshot), | |
1846 | GFP_ATOMIC); | |
1847 | ||
8491b0ef | 1848 | if (snapshot->pending_list) { |
98fefec8 JRS |
1849 | struct xe_sched_job *job_iter; |
1850 | ||
bbdf97c1 | 1851 | i = 0; |
98fefec8 | 1852 | list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { |
bbdf97c1 | 1853 | snapshot->pending_list[i].seqno = |
98fefec8 | 1854 | xe_sched_job_seqno(job_iter); |
bbdf97c1 | 1855 | snapshot->pending_list[i].fence = |
98fefec8 | 1856 | dma_fence_is_signaled(job_iter->fence) ? 1 : 0; |
bbdf97c1 | 1857 | snapshot->pending_list[i].finished = |
98fefec8 | 1858 | dma_fence_is_signaled(&job_iter->drm.s_fence->finished) |
bbdf97c1 RV |
1859 | ? 1 : 0; |
1860 | i++; | |
1861 | } | |
1862 | } | |
1863 | ||
1864 | spin_unlock(&sched->base.job_list_lock); | |
1865 | ||
1866 | return snapshot; | |
1867 | } | |
1868 | ||
1869 | /** | |
9b9529ce | 1870 | * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. |
bbdf97c1 RV |
1871 | * @snapshot: GuC Submit Engine snapshot object. |
1872 | * @p: drm_printer where it will be printed out. | |
1873 | * | |
1874 | * This function prints out a given GuC Submit Engine snapshot object. | |
1875 | */ | |
1876 | void | |
9b9529ce FD |
1877 | xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, |
1878 | struct drm_printer *p) | |
bbdf97c1 | 1879 | { |
dd08ebf6 MB |
1880 | int i; |
1881 | ||
bbdf97c1 RV |
1882 | if (!snapshot) |
1883 | return; | |
1884 | ||
1885 | drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); | |
1886 | drm_printf(p, "\tName: %s\n", snapshot->name); | |
1887 | drm_printf(p, "\tClass: %d\n", snapshot->class); | |
1888 | drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); | |
1889 | drm_printf(p, "\tWidth: %d\n", snapshot->width); | |
1890 | drm_printf(p, "\tRef: %d\n", snapshot->refcount); | |
1891 | drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); | |
1892 | drm_printf(p, "\tTimeslice: %u (us)\n", | |
1893 | snapshot->sched_props.timeslice_us); | |
dd08ebf6 | 1894 | drm_printf(p, "\tPreempt timeout: %u (us)\n", |
bbdf97c1 | 1895 | snapshot->sched_props.preempt_timeout_us); |
dd08ebf6 | 1896 | |
bbdf97c1 | 1897 | for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { |
dd08ebf6 | 1898 | drm_printf(p, "\tHW Context Desc: 0x%08x\n", |
bbdf97c1 | 1899 | snapshot->lrc[i].context_desc); |
dd08ebf6 | 1900 | drm_printf(p, "\tLRC Head: (memory) %u\n", |
bbdf97c1 | 1901 | snapshot->lrc[i].head); |
dd08ebf6 | 1902 | drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", |
bbdf97c1 RV |
1903 | snapshot->lrc[i].tail.internal, |
1904 | snapshot->lrc[i].tail.memory); | |
dd08ebf6 | 1905 | drm_printf(p, "\tStart seqno: (memory) %d\n", |
bbdf97c1 RV |
1906 | snapshot->lrc[i].start_seqno); |
1907 | drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); | |
dd08ebf6 | 1908 | } |
bbdf97c1 | 1909 | drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); |
9b9529ce | 1910 | drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); |
dd08ebf6 | 1911 | |
bbdf97c1 | 1912 | if (snapshot->parallel_execution) |
9b9529ce | 1913 | guc_exec_queue_wq_snapshot_print(snapshot, p); |
1825c492 | 1914 | |
bbdf97c1 RV |
1915 | for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; |
1916 | i++) | |
dd08ebf6 | 1917 | drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", |
bbdf97c1 RV |
1918 | snapshot->pending_list[i].seqno, |
1919 | snapshot->pending_list[i].fence, | |
1920 | snapshot->pending_list[i].finished); | |
1921 | } | |
1922 | ||
1923 | /** | |
9b9529ce | 1924 | * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given |
bbdf97c1 RV |
1925 | * snapshot. |
1926 | * @snapshot: GuC Submit Engine snapshot object. | |
1927 | * | |
1928 | * This function free all the memory that needed to be allocated at capture | |
1929 | * time. | |
1930 | */ | |
9b9529ce | 1931 | void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) |
bbdf97c1 RV |
1932 | { |
1933 | if (!snapshot) | |
1934 | return; | |
1935 | ||
1936 | kfree(snapshot->lrc); | |
1937 | kfree(snapshot->pending_list); | |
1938 | kfree(snapshot); | |
dd08ebf6 MB |
1939 | } |
1940 | ||
9b9529ce | 1941 | static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) |
bbdf97c1 | 1942 | { |
9b9529ce | 1943 | struct xe_guc_submit_exec_queue_snapshot *snapshot; |
98fefec8 JRS |
1944 | struct xe_gpu_scheduler *sched = &q->guc->sched; |
1945 | struct xe_sched_job *job; | |
1946 | bool found = false; | |
1947 | ||
1948 | spin_lock(&sched->base.job_list_lock); | |
1949 | list_for_each_entry(job, &sched->base.pending_list, drm.list) { | |
1950 | if (job->q == q) { | |
1951 | xe_sched_job_get(job); | |
1952 | found = true; | |
1953 | break; | |
1954 | } | |
1955 | } | |
1956 | spin_unlock(&sched->base.job_list_lock); | |
bbdf97c1 | 1957 | |
98fefec8 JRS |
1958 | if (!found) |
1959 | return; | |
1960 | ||
1961 | snapshot = xe_guc_exec_queue_snapshot_capture(job); | |
9b9529ce FD |
1962 | xe_guc_exec_queue_snapshot_print(snapshot, p); |
1963 | xe_guc_exec_queue_snapshot_free(snapshot); | |
98fefec8 JRS |
1964 | |
1965 | xe_sched_job_put(job); | |
bbdf97c1 RV |
1966 | } |
1967 | ||
1968 | /** | |
1969 | * xe_guc_submit_print - GuC Submit Print. | |
1970 | * @guc: GuC. | |
1971 | * @p: drm_printer where it will be printed out. | |
1972 | * | |
1973 | * This function capture and prints snapshots of **all** GuC Engines. | |
1974 | */ | |
dd08ebf6 MB |
1975 | void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) |
1976 | { | |
9b9529ce | 1977 | struct xe_exec_queue *q; |
dd08ebf6 MB |
1978 | unsigned long index; |
1979 | ||
c4991ee0 | 1980 | if (!xe_device_uc_enabled(guc_to_xe(guc))) |
dd08ebf6 MB |
1981 | return; |
1982 | ||
1983 | mutex_lock(&guc->submission_state.lock); | |
9b9529ce FD |
1984 | xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) |
1985 | guc_exec_queue_print(q, p); | |
dd08ebf6 MB |
1986 | mutex_unlock(&guc->submission_state.lock); |
1987 | } |