Commit | Line | Data |
---|---|---|
dd08ebf6 MB |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2022 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include <linux/bitfield.h> | |
7 | #include <linux/bitmap.h> | |
8 | #include <linux/circ_buf.h> | |
9 | #include <linux/delay.h> | |
10 | #include <linux/dma-fence-array.h> | |
11 | ||
12 | #include <drm/drm_managed.h> | |
13 | ||
14 | #include "xe_device.h" | |
15 | #include "xe_engine.h" | |
16 | #include "xe_guc.h" | |
17 | #include "xe_guc_ct.h" | |
18 | #include "xe_guc_engine_types.h" | |
19 | #include "xe_guc_submit.h" | |
20 | #include "xe_gt.h" | |
21 | #include "xe_force_wake.h" | |
22 | #include "xe_gpu_scheduler.h" | |
23 | #include "xe_hw_engine.h" | |
24 | #include "xe_hw_fence.h" | |
25 | #include "xe_lrc.h" | |
26 | #include "xe_macros.h" | |
27 | #include "xe_map.h" | |
28 | #include "xe_mocs.h" | |
29 | #include "xe_ring_ops_types.h" | |
30 | #include "xe_sched_job.h" | |
31 | #include "xe_trace.h" | |
32 | #include "xe_vm.h" | |
33 | ||
34 | #include "gt/intel_lrc_reg.h" | |
35 | ||
36 | static struct xe_gt * | |
37 | guc_to_gt(struct xe_guc *guc) | |
38 | { | |
39 | return container_of(guc, struct xe_gt, uc.guc); | |
40 | } | |
41 | ||
42 | static struct xe_device * | |
43 | guc_to_xe(struct xe_guc *guc) | |
44 | { | |
45 | return gt_to_xe(guc_to_gt(guc)); | |
46 | } | |
47 | ||
48 | static struct xe_guc * | |
49 | engine_to_guc(struct xe_engine *e) | |
50 | { | |
51 | return &e->gt->uc.guc; | |
52 | } | |
53 | ||
54 | /* | |
55 | * Helpers for engine state, using an atomic as some of the bits can transition | |
56 | * as the same time (e.g. a suspend can be happning at the same time as schedule | |
57 | * engine done being processed). | |
58 | */ | |
59 | #define ENGINE_STATE_REGISTERED (1 << 0) | |
60 | #define ENGINE_STATE_ENABLED (1 << 1) | |
61 | #define ENGINE_STATE_PENDING_ENABLE (1 << 2) | |
62 | #define ENGINE_STATE_PENDING_DISABLE (1 << 3) | |
63 | #define ENGINE_STATE_DESTROYED (1 << 4) | |
64 | #define ENGINE_STATE_SUSPENDED (1 << 5) | |
65 | #define ENGINE_STATE_RESET (1 << 6) | |
66 | #define ENGINE_STATE_KILLED (1 << 7) | |
67 | ||
68 | static bool engine_registered(struct xe_engine *e) | |
69 | { | |
70 | return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; | |
71 | } | |
72 | ||
73 | static void set_engine_registered(struct xe_engine *e) | |
74 | { | |
75 | atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); | |
76 | } | |
77 | ||
78 | static void clear_engine_registered(struct xe_engine *e) | |
79 | { | |
80 | atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); | |
81 | } | |
82 | ||
83 | static bool engine_enabled(struct xe_engine *e) | |
84 | { | |
85 | return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; | |
86 | } | |
87 | ||
88 | static void set_engine_enabled(struct xe_engine *e) | |
89 | { | |
90 | atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); | |
91 | } | |
92 | ||
93 | static void clear_engine_enabled(struct xe_engine *e) | |
94 | { | |
95 | atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); | |
96 | } | |
97 | ||
98 | static bool engine_pending_enable(struct xe_engine *e) | |
99 | { | |
100 | return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; | |
101 | } | |
102 | ||
103 | static void set_engine_pending_enable(struct xe_engine *e) | |
104 | { | |
105 | atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); | |
106 | } | |
107 | ||
108 | static void clear_engine_pending_enable(struct xe_engine *e) | |
109 | { | |
110 | atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); | |
111 | } | |
112 | ||
113 | static bool engine_pending_disable(struct xe_engine *e) | |
114 | { | |
115 | return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; | |
116 | } | |
117 | ||
118 | static void set_engine_pending_disable(struct xe_engine *e) | |
119 | { | |
120 | atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); | |
121 | } | |
122 | ||
123 | static void clear_engine_pending_disable(struct xe_engine *e) | |
124 | { | |
125 | atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); | |
126 | } | |
127 | ||
128 | static bool engine_destroyed(struct xe_engine *e) | |
129 | { | |
130 | return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; | |
131 | } | |
132 | ||
133 | static void set_engine_destroyed(struct xe_engine *e) | |
134 | { | |
135 | atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); | |
136 | } | |
137 | ||
138 | static bool engine_banned(struct xe_engine *e) | |
139 | { | |
140 | return (e->flags & ENGINE_FLAG_BANNED); | |
141 | } | |
142 | ||
143 | static void set_engine_banned(struct xe_engine *e) | |
144 | { | |
145 | e->flags |= ENGINE_FLAG_BANNED; | |
146 | } | |
147 | ||
148 | static bool engine_suspended(struct xe_engine *e) | |
149 | { | |
150 | return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; | |
151 | } | |
152 | ||
153 | static void set_engine_suspended(struct xe_engine *e) | |
154 | { | |
155 | atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); | |
156 | } | |
157 | ||
158 | static void clear_engine_suspended(struct xe_engine *e) | |
159 | { | |
160 | atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); | |
161 | } | |
162 | ||
163 | static bool engine_reset(struct xe_engine *e) | |
164 | { | |
165 | return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; | |
166 | } | |
167 | ||
168 | static void set_engine_reset(struct xe_engine *e) | |
169 | { | |
170 | atomic_or(ENGINE_STATE_RESET, &e->guc->state); | |
171 | } | |
172 | ||
173 | static bool engine_killed(struct xe_engine *e) | |
174 | { | |
175 | return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; | |
176 | } | |
177 | ||
178 | static void set_engine_killed(struct xe_engine *e) | |
179 | { | |
180 | atomic_or(ENGINE_STATE_KILLED, &e->guc->state); | |
181 | } | |
182 | ||
183 | static bool engine_killed_or_banned(struct xe_engine *e) | |
184 | { | |
185 | return engine_killed(e) || engine_banned(e); | |
186 | } | |
187 | ||
188 | static void guc_submit_fini(struct drm_device *drm, void *arg) | |
189 | { | |
190 | struct xe_guc *guc = arg; | |
191 | ||
192 | xa_destroy(&guc->submission_state.engine_lookup); | |
193 | ida_destroy(&guc->submission_state.guc_ids); | |
194 | bitmap_free(guc->submission_state.guc_ids_bitmap); | |
195 | } | |
196 | ||
197 | #define GUC_ID_MAX 65535 | |
198 | #define GUC_ID_NUMBER_MLRC 4096 | |
199 | #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) | |
200 | #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC | |
201 | ||
202 | static const struct xe_engine_ops guc_engine_ops; | |
203 | ||
204 | static void primelockdep(struct xe_guc *guc) | |
205 | { | |
206 | if (!IS_ENABLED(CONFIG_LOCKDEP)) | |
207 | return; | |
208 | ||
209 | fs_reclaim_acquire(GFP_KERNEL); | |
210 | ||
211 | mutex_lock(&guc->submission_state.lock); | |
212 | might_lock(&guc->submission_state.suspend.lock); | |
213 | mutex_unlock(&guc->submission_state.lock); | |
214 | ||
215 | fs_reclaim_release(GFP_KERNEL); | |
216 | } | |
217 | ||
218 | int xe_guc_submit_init(struct xe_guc *guc) | |
219 | { | |
220 | struct xe_device *xe = guc_to_xe(guc); | |
221 | struct xe_gt *gt = guc_to_gt(guc); | |
222 | int err; | |
223 | ||
224 | guc->submission_state.guc_ids_bitmap = | |
225 | bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); | |
226 | if (!guc->submission_state.guc_ids_bitmap) | |
227 | return -ENOMEM; | |
228 | ||
229 | gt->engine_ops = &guc_engine_ops; | |
230 | ||
231 | mutex_init(&guc->submission_state.lock); | |
232 | xa_init(&guc->submission_state.engine_lookup); | |
233 | ida_init(&guc->submission_state.guc_ids); | |
234 | ||
235 | spin_lock_init(&guc->submission_state.suspend.lock); | |
236 | guc->submission_state.suspend.context = dma_fence_context_alloc(1); | |
237 | ||
238 | primelockdep(guc); | |
239 | ||
240 | err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); | |
241 | if (err) | |
242 | return err; | |
243 | ||
244 | return 0; | |
245 | } | |
246 | ||
247 | static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) | |
248 | { | |
249 | int ret; | |
250 | void *ptr; | |
251 | ||
252 | /* | |
253 | * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, | |
254 | * worse case user gets -ENOMEM on engine create and has to try again. | |
255 | * | |
256 | * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent | |
257 | * failure. | |
258 | */ | |
259 | lockdep_assert_held(&guc->submission_state.lock); | |
260 | ||
261 | if (xe_engine_is_parallel(e)) { | |
262 | void *bitmap = guc->submission_state.guc_ids_bitmap; | |
263 | ||
264 | ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, | |
265 | order_base_2(e->width)); | |
266 | } else { | |
267 | ret = ida_simple_get(&guc->submission_state.guc_ids, 0, | |
268 | GUC_ID_NUMBER_SLRC, GFP_NOWAIT); | |
269 | } | |
270 | if (ret < 0) | |
271 | return ret; | |
272 | ||
273 | e->guc->id = ret; | |
274 | if (xe_engine_is_parallel(e)) | |
275 | e->guc->id += GUC_ID_START_MLRC; | |
276 | ||
277 | ptr = xa_store(&guc->submission_state.engine_lookup, | |
278 | e->guc->id, e, GFP_NOWAIT); | |
279 | if (IS_ERR(ptr)) { | |
280 | ret = PTR_ERR(ptr); | |
281 | goto err_release; | |
282 | } | |
283 | ||
284 | return 0; | |
285 | ||
286 | err_release: | |
287 | ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); | |
288 | return ret; | |
289 | } | |
290 | ||
291 | static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) | |
292 | { | |
293 | mutex_lock(&guc->submission_state.lock); | |
294 | xa_erase(&guc->submission_state.engine_lookup, e->guc->id); | |
295 | if (xe_engine_is_parallel(e)) | |
296 | bitmap_release_region(guc->submission_state.guc_ids_bitmap, | |
297 | e->guc->id - GUC_ID_START_MLRC, | |
298 | order_base_2(e->width)); | |
299 | else | |
300 | ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); | |
301 | mutex_unlock(&guc->submission_state.lock); | |
302 | } | |
303 | ||
304 | struct engine_policy { | |
305 | u32 count; | |
306 | struct guc_update_engine_policy h2g; | |
307 | }; | |
308 | ||
309 | static u32 __guc_engine_policy_action_size(struct engine_policy *policy) | |
310 | { | |
311 | size_t bytes = sizeof(policy->h2g.header) + | |
312 | (sizeof(policy->h2g.klv[0]) * policy->count); | |
313 | ||
314 | return bytes / sizeof(u32); | |
315 | } | |
316 | ||
317 | static void __guc_engine_policy_start_klv(struct engine_policy *policy, | |
318 | u16 guc_id) | |
319 | { | |
320 | policy->h2g.header.action = | |
321 | XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; | |
322 | policy->h2g.header.guc_id = guc_id; | |
323 | policy->count = 0; | |
324 | } | |
325 | ||
326 | #define MAKE_ENGINE_POLICY_ADD(func, id) \ | |
327 | static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ | |
328 | u32 data) \ | |
329 | { \ | |
330 | XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ | |
331 | \ | |
332 | policy->h2g.klv[policy->count].kl = \ | |
333 | FIELD_PREP(GUC_KLV_0_KEY, \ | |
334 | GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ | |
335 | FIELD_PREP(GUC_KLV_0_LEN, 1); \ | |
336 | policy->h2g.klv[policy->count].value = data; \ | |
337 | policy->count++; \ | |
338 | } | |
339 | ||
340 | MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) | |
341 | MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) | |
342 | MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) | |
343 | #undef MAKE_ENGINE_POLICY_ADD | |
344 | ||
345 | static const int xe_engine_prio_to_guc[] = { | |
346 | [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, | |
347 | [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, | |
348 | [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, | |
349 | [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, | |
350 | }; | |
351 | ||
352 | static void init_policies(struct xe_guc *guc, struct xe_engine *e) | |
353 | { | |
354 | struct engine_policy policy; | |
355 | enum xe_engine_priority prio = e->priority; | |
356 | u32 timeslice_us = e->sched_props.timeslice_us; | |
357 | u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; | |
358 | ||
359 | XE_BUG_ON(!engine_registered(e)); | |
360 | ||
361 | __guc_engine_policy_start_klv(&policy, e->guc->id); | |
362 | __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); | |
363 | __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); | |
364 | __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); | |
365 | ||
366 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
367 | __guc_engine_policy_action_size(&policy), 0, 0); | |
368 | } | |
369 | ||
370 | static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) | |
371 | { | |
372 | struct engine_policy policy; | |
373 | ||
374 | __guc_engine_policy_start_klv(&policy, e->guc->id); | |
375 | __guc_engine_policy_add_preemption_timeout(&policy, 1); | |
376 | ||
377 | xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, | |
378 | __guc_engine_policy_action_size(&policy), 0, 0); | |
379 | } | |
380 | ||
381 | #define PARALLEL_SCRATCH_SIZE 2048 | |
382 | #define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) | |
383 | #define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) | |
384 | #define CACHELINE_BYTES 64 | |
385 | ||
386 | struct sync_semaphore { | |
387 | u32 semaphore; | |
388 | u8 unused[CACHELINE_BYTES - sizeof(u32)]; | |
389 | }; | |
390 | ||
391 | struct parallel_scratch { | |
392 | struct guc_sched_wq_desc wq_desc; | |
393 | ||
394 | struct sync_semaphore go; | |
395 | struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; | |
396 | ||
397 | u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - | |
398 | sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; | |
399 | ||
400 | u32 wq[WQ_SIZE / sizeof(u32)]; | |
401 | }; | |
402 | ||
403 | #define parallel_read(xe_, map_, field_) \ | |
404 | xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) | |
405 | #define parallel_write(xe_, map_, field_, val_) \ | |
406 | xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) | |
407 | ||
408 | static void __register_mlrc_engine(struct xe_guc *guc, | |
409 | struct xe_engine *e, | |
410 | struct guc_ctxt_registration_info *info) | |
411 | { | |
412 | #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) | |
413 | u32 action[MAX_MLRC_REG_SIZE]; | |
414 | int len = 0; | |
415 | int i; | |
416 | ||
417 | XE_BUG_ON(!xe_engine_is_parallel(e)); | |
418 | ||
419 | action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; | |
420 | action[len++] = info->flags; | |
421 | action[len++] = info->context_idx; | |
422 | action[len++] = info->engine_class; | |
423 | action[len++] = info->engine_submit_mask; | |
424 | action[len++] = info->wq_desc_lo; | |
425 | action[len++] = info->wq_desc_hi; | |
426 | action[len++] = info->wq_base_lo; | |
427 | action[len++] = info->wq_base_hi; | |
428 | action[len++] = info->wq_size; | |
429 | action[len++] = e->width; | |
430 | action[len++] = info->hwlrca_lo; | |
431 | action[len++] = info->hwlrca_hi; | |
432 | ||
433 | for (i = 1; i < e->width; ++i) { | |
434 | struct xe_lrc *lrc = e->lrc + i; | |
435 | ||
436 | action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); | |
437 | action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); | |
438 | } | |
439 | ||
440 | XE_BUG_ON(len > MAX_MLRC_REG_SIZE); | |
441 | #undef MAX_MLRC_REG_SIZE | |
442 | ||
443 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
444 | } | |
445 | ||
446 | static void __register_engine(struct xe_guc *guc, | |
447 | struct guc_ctxt_registration_info *info) | |
448 | { | |
449 | u32 action[] = { | |
450 | XE_GUC_ACTION_REGISTER_CONTEXT, | |
451 | info->flags, | |
452 | info->context_idx, | |
453 | info->engine_class, | |
454 | info->engine_submit_mask, | |
455 | info->wq_desc_lo, | |
456 | info->wq_desc_hi, | |
457 | info->wq_base_lo, | |
458 | info->wq_base_hi, | |
459 | info->wq_size, | |
460 | info->hwlrca_lo, | |
461 | info->hwlrca_hi, | |
462 | }; | |
463 | ||
464 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); | |
465 | } | |
466 | ||
467 | static void register_engine(struct xe_engine *e) | |
468 | { | |
469 | struct xe_guc *guc = engine_to_guc(e); | |
470 | struct xe_device *xe = guc_to_xe(guc); | |
471 | struct xe_lrc *lrc = e->lrc; | |
472 | struct guc_ctxt_registration_info info; | |
473 | ||
474 | XE_BUG_ON(engine_registered(e)); | |
475 | ||
476 | memset(&info, 0, sizeof(info)); | |
477 | info.context_idx = e->guc->id; | |
478 | info.engine_class = xe_engine_class_to_guc_class(e->class); | |
479 | info.engine_submit_mask = e->logical_mask; | |
480 | info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); | |
481 | info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); | |
482 | info.flags = CONTEXT_REGISTRATION_FLAG_KMD; | |
483 | ||
484 | if (xe_engine_is_parallel(e)) { | |
485 | u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); | |
486 | struct iosys_map map = xe_lrc_parallel_map(lrc); | |
487 | ||
488 | info.wq_desc_lo = lower_32_bits(ggtt_addr + | |
489 | offsetof(struct parallel_scratch, wq_desc)); | |
490 | info.wq_desc_hi = upper_32_bits(ggtt_addr + | |
491 | offsetof(struct parallel_scratch, wq_desc)); | |
492 | info.wq_base_lo = lower_32_bits(ggtt_addr + | |
493 | offsetof(struct parallel_scratch, wq[0])); | |
494 | info.wq_base_hi = upper_32_bits(ggtt_addr + | |
495 | offsetof(struct parallel_scratch, wq[0])); | |
496 | info.wq_size = WQ_SIZE; | |
497 | ||
498 | e->guc->wqi_head = 0; | |
499 | e->guc->wqi_tail = 0; | |
500 | xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); | |
501 | parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); | |
502 | } | |
503 | ||
504 | set_engine_registered(e); | |
505 | trace_xe_engine_register(e); | |
506 | if (xe_engine_is_parallel(e)) | |
507 | __register_mlrc_engine(guc, e, &info); | |
508 | else | |
509 | __register_engine(guc, &info); | |
510 | init_policies(guc, e); | |
511 | } | |
512 | ||
513 | static u32 wq_space_until_wrap(struct xe_engine *e) | |
514 | { | |
515 | return (WQ_SIZE - e->guc->wqi_tail); | |
516 | } | |
517 | ||
518 | static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) | |
519 | { | |
520 | struct xe_guc *guc = engine_to_guc(e); | |
521 | struct xe_device *xe = guc_to_xe(guc); | |
522 | struct iosys_map map = xe_lrc_parallel_map(e->lrc); | |
523 | unsigned int sleep_period_ms = 1; | |
524 | ||
525 | #define AVAILABLE_SPACE \ | |
526 | CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) | |
527 | if (wqi_size > AVAILABLE_SPACE) { | |
528 | try_again: | |
529 | e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); | |
530 | if (wqi_size > AVAILABLE_SPACE) { | |
531 | if (sleep_period_ms == 1024) { | |
532 | xe_gt_reset_async(e->gt); | |
533 | return -ENODEV; | |
534 | } | |
535 | ||
536 | msleep(sleep_period_ms); | |
537 | sleep_period_ms <<= 1; | |
538 | goto try_again; | |
539 | } | |
540 | } | |
541 | #undef AVAILABLE_SPACE | |
542 | ||
543 | return 0; | |
544 | } | |
545 | ||
546 | static int wq_noop_append(struct xe_engine *e) | |
547 | { | |
548 | struct xe_guc *guc = engine_to_guc(e); | |
549 | struct xe_device *xe = guc_to_xe(guc); | |
550 | struct iosys_map map = xe_lrc_parallel_map(e->lrc); | |
551 | u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; | |
552 | ||
553 | if (wq_wait_for_space(e, wq_space_until_wrap(e))) | |
554 | return -ENODEV; | |
555 | ||
556 | XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); | |
557 | ||
558 | parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], | |
559 | FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | | |
560 | FIELD_PREP(WQ_LEN_MASK, len_dw)); | |
561 | e->guc->wqi_tail = 0; | |
562 | ||
563 | return 0; | |
564 | } | |
565 | ||
566 | static void wq_item_append(struct xe_engine *e) | |
567 | { | |
568 | struct xe_guc *guc = engine_to_guc(e); | |
569 | struct xe_device *xe = guc_to_xe(guc); | |
570 | struct iosys_map map = xe_lrc_parallel_map(e->lrc); | |
571 | u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; | |
572 | u32 wqi_size = (e->width + 3) * sizeof(u32); | |
573 | u32 len_dw = (wqi_size / sizeof(u32)) - 1; | |
574 | int i = 0, j; | |
575 | ||
576 | if (wqi_size > wq_space_until_wrap(e)) { | |
577 | if (wq_noop_append(e)) | |
578 | return; | |
579 | } | |
580 | if (wq_wait_for_space(e, wqi_size)) | |
581 | return; | |
582 | ||
583 | wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | | |
584 | FIELD_PREP(WQ_LEN_MASK, len_dw); | |
585 | wqi[i++] = xe_lrc_descriptor(e->lrc); | |
586 | wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | | |
587 | FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); | |
588 | wqi[i++] = 0; | |
589 | for (j = 1; j < e->width; ++j) { | |
590 | struct xe_lrc *lrc = e->lrc + j; | |
591 | ||
592 | wqi[i++] = lrc->ring.tail / sizeof(u64); | |
593 | } | |
594 | ||
595 | XE_BUG_ON(i != wqi_size / sizeof(u32)); | |
596 | ||
597 | iosys_map_incr(&map, offsetof(struct parallel_scratch, | |
598 | wq[e->guc->wqi_tail / sizeof(u32)])); | |
599 | xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); | |
600 | e->guc->wqi_tail += wqi_size; | |
601 | XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); | |
602 | ||
603 | xe_device_wmb(xe); | |
604 | ||
605 | map = xe_lrc_parallel_map(e->lrc); | |
606 | parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); | |
607 | } | |
608 | ||
609 | #define RESUME_PENDING ~0x0ull | |
610 | static void submit_engine(struct xe_engine *e) | |
611 | { | |
612 | struct xe_guc *guc = engine_to_guc(e); | |
613 | struct xe_lrc *lrc = e->lrc; | |
614 | u32 action[3]; | |
615 | u32 g2h_len = 0; | |
616 | u32 num_g2h = 0; | |
617 | int len = 0; | |
618 | bool extra_submit = false; | |
619 | ||
620 | XE_BUG_ON(!engine_registered(e)); | |
621 | ||
622 | if (xe_engine_is_parallel(e)) | |
623 | wq_item_append(e); | |
624 | else | |
625 | xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); | |
626 | ||
627 | if (engine_suspended(e) && !xe_engine_is_parallel(e)) | |
628 | return; | |
629 | ||
630 | if (!engine_enabled(e) && !engine_suspended(e)) { | |
631 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; | |
632 | action[len++] = e->guc->id; | |
633 | action[len++] = GUC_CONTEXT_ENABLE; | |
634 | g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; | |
635 | num_g2h = 1; | |
636 | if (xe_engine_is_parallel(e)) | |
637 | extra_submit = true; | |
638 | ||
639 | e->guc->resume_time = RESUME_PENDING; | |
640 | set_engine_pending_enable(e); | |
641 | set_engine_enabled(e); | |
642 | trace_xe_engine_scheduling_enable(e); | |
643 | } else { | |
644 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
645 | action[len++] = e->guc->id; | |
646 | trace_xe_engine_submit(e); | |
647 | } | |
648 | ||
649 | xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); | |
650 | ||
651 | if (extra_submit) { | |
652 | len = 0; | |
653 | action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; | |
654 | action[len++] = e->guc->id; | |
655 | trace_xe_engine_submit(e); | |
656 | ||
657 | xe_guc_ct_send(&guc->ct, action, len, 0, 0); | |
658 | } | |
659 | } | |
660 | ||
661 | static struct dma_fence * | |
662 | guc_engine_run_job(struct drm_sched_job *drm_job) | |
663 | { | |
664 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
665 | struct xe_engine *e = job->engine; | |
666 | ||
667 | XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && | |
668 | !engine_banned(e) && !engine_suspended(e)); | |
669 | ||
670 | trace_xe_sched_job_run(job); | |
671 | ||
672 | if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { | |
673 | if (!engine_registered(e)) | |
674 | register_engine(e); | |
675 | e->ring_ops->emit_job(job); | |
676 | submit_engine(e); | |
677 | } | |
678 | ||
679 | if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) | |
680 | return job->fence; | |
681 | else | |
682 | return dma_fence_get(job->fence); | |
683 | } | |
684 | ||
685 | static void guc_engine_free_job(struct drm_sched_job *drm_job) | |
686 | { | |
687 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
688 | ||
689 | trace_xe_sched_job_free(job); | |
690 | xe_sched_job_put(job); | |
691 | } | |
692 | ||
693 | static int guc_read_stopped(struct xe_guc *guc) | |
694 | { | |
695 | return atomic_read(&guc->submission_state.stopped); | |
696 | } | |
697 | ||
698 | #define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ | |
699 | u32 action[] = { \ | |
700 | XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ | |
701 | e->guc->id, \ | |
702 | GUC_CONTEXT_##enable_disable, \ | |
703 | } | |
704 | ||
705 | static void disable_scheduling_deregister(struct xe_guc *guc, | |
706 | struct xe_engine *e) | |
707 | { | |
708 | MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); | |
709 | int ret; | |
710 | ||
711 | set_min_preemption_timeout(guc, e); | |
712 | smp_rmb(); | |
713 | ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || | |
714 | guc_read_stopped(guc), HZ * 5); | |
715 | if (!ret) { | |
716 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
717 | ||
718 | XE_WARN_ON("Pending enable failed to respond"); | |
719 | xe_sched_submission_start(sched); | |
720 | xe_gt_reset_async(e->gt); | |
721 | xe_sched_tdr_queue_imm(sched); | |
722 | return; | |
723 | } | |
724 | ||
725 | clear_engine_enabled(e); | |
726 | set_engine_pending_disable(e); | |
727 | set_engine_destroyed(e); | |
728 | trace_xe_engine_scheduling_disable(e); | |
729 | ||
730 | /* | |
731 | * Reserve space for both G2H here as the 2nd G2H is sent from a G2H | |
732 | * handler and we are not allowed to reserved G2H space in handlers. | |
733 | */ | |
734 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
735 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + | |
736 | G2H_LEN_DW_DEREGISTER_CONTEXT, 2); | |
737 | } | |
738 | ||
739 | static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); | |
740 | ||
741 | #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) | |
742 | static void simple_error_capture(struct xe_engine *e) | |
743 | { | |
744 | struct xe_guc *guc = engine_to_guc(e); | |
745 | struct drm_printer p = drm_err_printer(""); | |
746 | struct xe_hw_engine *hwe; | |
747 | enum xe_hw_engine_id id; | |
748 | u32 adj_logical_mask = e->logical_mask; | |
749 | u32 width_mask = (0x1 << e->width) - 1; | |
750 | int i; | |
751 | bool cookie; | |
752 | ||
753 | if (e->vm && !e->vm->error_capture.capture_once) { | |
754 | e->vm->error_capture.capture_once = true; | |
755 | cookie = dma_fence_begin_signalling(); | |
756 | for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { | |
757 | if (adj_logical_mask & BIT(i)) { | |
758 | adj_logical_mask |= width_mask << i; | |
759 | i += e->width; | |
760 | } else { | |
761 | ++i; | |
762 | } | |
763 | } | |
764 | ||
765 | xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); | |
766 | xe_guc_ct_print(&guc->ct, &p); | |
767 | guc_engine_print(e, &p); | |
768 | for_each_hw_engine(hwe, guc_to_gt(guc), id) { | |
769 | if (hwe->class != e->hwe->class || | |
770 | !(BIT(hwe->logical_instance) & adj_logical_mask)) | |
771 | continue; | |
772 | xe_hw_engine_print_state(hwe, &p); | |
773 | } | |
774 | xe_analyze_vm(&p, e->vm, e->gt->info.id); | |
775 | xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); | |
776 | dma_fence_end_signalling(cookie); | |
777 | } | |
778 | } | |
779 | #else | |
780 | static void simple_error_capture(struct xe_engine *e) | |
781 | { | |
782 | } | |
783 | #endif | |
784 | ||
785 | static enum drm_gpu_sched_stat | |
786 | guc_engine_timedout_job(struct drm_sched_job *drm_job) | |
787 | { | |
788 | struct xe_sched_job *job = to_xe_sched_job(drm_job); | |
789 | struct xe_sched_job *tmp_job; | |
790 | struct xe_engine *e = job->engine; | |
791 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
792 | struct xe_device *xe = guc_to_xe(engine_to_guc(e)); | |
793 | int err = -ETIME; | |
794 | int i = 0; | |
795 | ||
796 | if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { | |
797 | XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); | |
798 | XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); | |
799 | ||
800 | drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", | |
801 | xe_sched_job_seqno(job), e->guc->id, e->flags); | |
802 | simple_error_capture(e); | |
803 | } else { | |
804 | drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", | |
805 | xe_sched_job_seqno(job), e->guc->id, e->flags); | |
806 | } | |
807 | trace_xe_sched_job_timedout(job); | |
808 | ||
809 | /* Kill the run_job entry point */ | |
810 | xe_sched_submission_stop(sched); | |
811 | ||
812 | /* | |
813 | * Kernel jobs should never fail, nor should VM jobs if they do | |
814 | * somethings has gone wrong and the GT needs a reset | |
815 | */ | |
816 | if (e->flags & ENGINE_FLAG_KERNEL || | |
817 | (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { | |
818 | if (!xe_sched_invalidate_job(job, 2)) { | |
819 | xe_sched_add_pending_job(sched, job); | |
820 | xe_sched_submission_start(sched); | |
821 | xe_gt_reset_async(e->gt); | |
822 | goto out; | |
823 | } | |
824 | } | |
825 | ||
826 | /* Engine state now stable, disable scheduling if needed */ | |
827 | if (engine_enabled(e)) { | |
828 | struct xe_guc *guc = engine_to_guc(e); | |
829 | int ret; | |
830 | ||
831 | if (engine_reset(e)) | |
832 | err = -EIO; | |
833 | set_engine_banned(e); | |
834 | xe_engine_get(e); | |
835 | disable_scheduling_deregister(engine_to_guc(e), e); | |
836 | ||
837 | /* | |
838 | * Must wait for scheduling to be disabled before signalling | |
839 | * any fences, if GT broken the GT reset code should signal us. | |
840 | * | |
841 | * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault | |
842 | * error) messages which can cause the schedule disable to get | |
843 | * lost. If this occurs, trigger a GT reset to recover. | |
844 | */ | |
845 | smp_rmb(); | |
846 | ret = wait_event_timeout(guc->ct.wq, | |
847 | !engine_pending_disable(e) || | |
848 | guc_read_stopped(guc), HZ * 5); | |
849 | if (!ret) { | |
850 | XE_WARN_ON("Schedule disable failed to respond"); | |
851 | xe_sched_add_pending_job(sched, job); | |
852 | xe_sched_submission_start(sched); | |
853 | xe_gt_reset_async(e->gt); | |
854 | xe_sched_tdr_queue_imm(sched); | |
855 | goto out; | |
856 | } | |
857 | } | |
858 | ||
859 | /* Stop fence signaling */ | |
860 | xe_hw_fence_irq_stop(e->fence_irq); | |
861 | ||
862 | /* | |
863 | * Fence state now stable, stop / start scheduler which cleans up any | |
864 | * fences that are complete | |
865 | */ | |
866 | xe_sched_add_pending_job(sched, job); | |
867 | xe_sched_submission_start(sched); | |
868 | xe_sched_tdr_queue_imm(&e->guc->sched); | |
869 | ||
870 | /* Mark all outstanding jobs as bad, thus completing them */ | |
871 | spin_lock(&sched->base.job_list_lock); | |
872 | list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) | |
873 | xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); | |
874 | spin_unlock(&sched->base.job_list_lock); | |
875 | ||
876 | /* Start fence signaling */ | |
877 | xe_hw_fence_irq_start(e->fence_irq); | |
878 | ||
879 | out: | |
880 | return DRM_GPU_SCHED_STAT_NOMINAL; | |
881 | } | |
882 | ||
883 | static void __guc_engine_fini_async(struct work_struct *w) | |
884 | { | |
885 | struct xe_guc_engine *ge = | |
886 | container_of(w, struct xe_guc_engine, fini_async); | |
887 | struct xe_engine *e = ge->engine; | |
888 | struct xe_guc *guc = engine_to_guc(e); | |
889 | ||
890 | trace_xe_engine_destroy(e); | |
891 | ||
892 | if (e->flags & ENGINE_FLAG_PERSISTENT) | |
893 | xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); | |
894 | release_guc_id(guc, e); | |
895 | xe_sched_entity_fini(&ge->entity); | |
896 | xe_sched_fini(&ge->sched); | |
897 | ||
898 | if (!(e->flags & ENGINE_FLAG_KERNEL)) { | |
899 | kfree(ge); | |
900 | xe_engine_fini(e); | |
901 | } | |
902 | } | |
903 | ||
904 | static void guc_engine_fini_async(struct xe_engine *e) | |
905 | { | |
906 | bool kernel = e->flags & ENGINE_FLAG_KERNEL; | |
907 | ||
908 | INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); | |
909 | queue_work(system_unbound_wq, &e->guc->fini_async); | |
910 | ||
911 | /* We must block on kernel engines so slabs are empty on driver unload */ | |
912 | if (kernel) { | |
913 | struct xe_guc_engine *ge = e->guc; | |
914 | ||
915 | flush_work(&ge->fini_async); | |
916 | kfree(ge); | |
917 | xe_engine_fini(e); | |
918 | } | |
919 | } | |
920 | ||
921 | static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) | |
922 | { | |
923 | /* | |
924 | * Might be done from within the GPU scheduler, need to do async as we | |
925 | * fini the scheduler when the engine is fini'd, the scheduler can't | |
926 | * complete fini within itself (circular dependency). Async resolves | |
927 | * this we and don't really care when everything is fini'd, just that it | |
928 | * is. | |
929 | */ | |
930 | guc_engine_fini_async(e); | |
931 | } | |
932 | ||
933 | static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) | |
934 | { | |
935 | struct xe_engine *e = msg->private_data; | |
936 | struct xe_guc *guc = engine_to_guc(e); | |
937 | ||
938 | XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); | |
939 | trace_xe_engine_cleanup_entity(e); | |
940 | ||
941 | if (engine_registered(e)) | |
942 | disable_scheduling_deregister(guc, e); | |
943 | else | |
944 | __guc_engine_fini(guc, e); | |
945 | } | |
946 | ||
947 | static bool guc_engine_allowed_to_change_state(struct xe_engine *e) | |
948 | { | |
949 | return !engine_killed_or_banned(e) && engine_registered(e); | |
950 | } | |
951 | ||
952 | static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) | |
953 | { | |
954 | struct xe_engine *e = msg->private_data; | |
955 | struct xe_guc *guc = engine_to_guc(e); | |
956 | ||
957 | if (guc_engine_allowed_to_change_state(e)) | |
958 | init_policies(guc, e); | |
959 | kfree(msg); | |
960 | } | |
961 | ||
962 | static void suspend_fence_signal(struct xe_engine *e) | |
963 | { | |
964 | struct xe_guc *guc = engine_to_guc(e); | |
965 | ||
966 | XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && | |
967 | !guc_read_stopped(guc)); | |
968 | XE_BUG_ON(!e->guc->suspend_pending); | |
969 | ||
970 | e->guc->suspend_pending = false; | |
971 | smp_wmb(); | |
972 | wake_up(&e->guc->suspend_wait); | |
973 | } | |
974 | ||
975 | static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) | |
976 | { | |
977 | struct xe_engine *e = msg->private_data; | |
978 | struct xe_guc *guc = engine_to_guc(e); | |
979 | ||
980 | if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && | |
981 | engine_enabled(e)) { | |
982 | wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || | |
983 | guc_read_stopped(guc)); | |
984 | ||
985 | if (!guc_read_stopped(guc)) { | |
986 | MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); | |
987 | s64 since_resume_ms = | |
988 | ktime_ms_delta(ktime_get(), | |
989 | e->guc->resume_time); | |
990 | s64 wait_ms = e->vm->preempt.min_run_period_ms - | |
991 | since_resume_ms; | |
992 | ||
993 | if (wait_ms > 0 && e->guc->resume_time) | |
994 | msleep(wait_ms); | |
995 | ||
996 | set_engine_suspended(e); | |
997 | clear_engine_enabled(e); | |
998 | set_engine_pending_disable(e); | |
999 | trace_xe_engine_scheduling_disable(e); | |
1000 | ||
1001 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1002 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1003 | } | |
1004 | } else if (e->guc->suspend_pending) { | |
1005 | set_engine_suspended(e); | |
1006 | suspend_fence_signal(e); | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) | |
1011 | { | |
1012 | struct xe_engine *e = msg->private_data; | |
1013 | struct xe_guc *guc = engine_to_guc(e); | |
1014 | ||
1015 | if (guc_engine_allowed_to_change_state(e)) { | |
1016 | MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); | |
1017 | ||
1018 | e->guc->resume_time = RESUME_PENDING; | |
1019 | clear_engine_suspended(e); | |
1020 | set_engine_pending_enable(e); | |
1021 | set_engine_enabled(e); | |
1022 | trace_xe_engine_scheduling_enable(e); | |
1023 | ||
1024 | xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), | |
1025 | G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); | |
1026 | } else { | |
1027 | clear_engine_suspended(e); | |
1028 | } | |
1029 | } | |
1030 | ||
1031 | #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ | |
1032 | #define SET_SCHED_PROPS 2 | |
1033 | #define SUSPEND 3 | |
1034 | #define RESUME 4 | |
1035 | ||
1036 | static void guc_engine_process_msg(struct xe_sched_msg *msg) | |
1037 | { | |
1038 | trace_xe_sched_msg_recv(msg); | |
1039 | ||
1040 | switch (msg->opcode) { | |
1041 | case CLEANUP: | |
1042 | __guc_engine_process_msg_cleanup(msg); | |
1043 | break; | |
1044 | case SET_SCHED_PROPS: | |
1045 | __guc_engine_process_msg_set_sched_props(msg); | |
1046 | break; | |
1047 | case SUSPEND: | |
1048 | __guc_engine_process_msg_suspend(msg); | |
1049 | break; | |
1050 | case RESUME: | |
1051 | __guc_engine_process_msg_resume(msg); | |
1052 | break; | |
1053 | default: | |
1054 | XE_BUG_ON("Unknown message type"); | |
1055 | } | |
1056 | } | |
1057 | ||
1058 | static const struct drm_sched_backend_ops drm_sched_ops = { | |
1059 | .run_job = guc_engine_run_job, | |
1060 | .free_job = guc_engine_free_job, | |
1061 | .timedout_job = guc_engine_timedout_job, | |
1062 | }; | |
1063 | ||
1064 | static const struct xe_sched_backend_ops xe_sched_ops = { | |
1065 | .process_msg = guc_engine_process_msg, | |
1066 | }; | |
1067 | ||
1068 | static int guc_engine_init(struct xe_engine *e) | |
1069 | { | |
1070 | struct xe_gpu_scheduler *sched; | |
1071 | struct xe_guc *guc = engine_to_guc(e); | |
1072 | struct xe_guc_engine *ge; | |
1073 | long timeout; | |
1074 | int err; | |
1075 | ||
1076 | XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); | |
1077 | ||
1078 | ge = kzalloc(sizeof(*ge), GFP_KERNEL); | |
1079 | if (!ge) | |
1080 | return -ENOMEM; | |
1081 | ||
1082 | e->guc = ge; | |
1083 | ge->engine = e; | |
1084 | init_waitqueue_head(&ge->suspend_wait); | |
1085 | ||
1086 | timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; | |
1087 | err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, | |
1088 | e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, | |
1089 | 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, | |
1090 | e->name, gt_to_xe(e->gt)->drm.dev); | |
1091 | if (err) | |
1092 | goto err_free; | |
1093 | ||
1094 | sched = &ge->sched; | |
1095 | err = xe_sched_entity_init(&ge->entity, sched); | |
1096 | if (err) | |
1097 | goto err_sched; | |
1098 | e->priority = XE_ENGINE_PRIORITY_NORMAL; | |
1099 | ||
1100 | mutex_lock(&guc->submission_state.lock); | |
1101 | ||
1102 | err = alloc_guc_id(guc, e); | |
1103 | if (err) | |
1104 | goto err_entity; | |
1105 | ||
1106 | e->entity = &ge->entity; | |
1107 | ||
1108 | if (guc_read_stopped(guc)) | |
1109 | xe_sched_stop(sched); | |
1110 | ||
1111 | mutex_unlock(&guc->submission_state.lock); | |
1112 | ||
1113 | switch (e->class) { | |
1114 | case XE_ENGINE_CLASS_RENDER: | |
1115 | sprintf(e->name, "rcs%d", e->guc->id); | |
1116 | break; | |
1117 | case XE_ENGINE_CLASS_VIDEO_DECODE: | |
1118 | sprintf(e->name, "vcs%d", e->guc->id); | |
1119 | break; | |
1120 | case XE_ENGINE_CLASS_VIDEO_ENHANCE: | |
1121 | sprintf(e->name, "vecs%d", e->guc->id); | |
1122 | break; | |
1123 | case XE_ENGINE_CLASS_COPY: | |
1124 | sprintf(e->name, "bcs%d", e->guc->id); | |
1125 | break; | |
1126 | case XE_ENGINE_CLASS_COMPUTE: | |
1127 | sprintf(e->name, "ccs%d", e->guc->id); | |
1128 | break; | |
1129 | default: | |
1130 | XE_WARN_ON(e->class); | |
1131 | } | |
1132 | ||
1133 | trace_xe_engine_create(e); | |
1134 | ||
1135 | return 0; | |
1136 | ||
1137 | err_entity: | |
1138 | xe_sched_entity_fini(&ge->entity); | |
1139 | err_sched: | |
1140 | xe_sched_fini(&ge->sched); | |
1141 | err_free: | |
1142 | kfree(ge); | |
1143 | ||
1144 | return err; | |
1145 | } | |
1146 | ||
1147 | static void guc_engine_kill(struct xe_engine *e) | |
1148 | { | |
1149 | trace_xe_engine_kill(e); | |
1150 | set_engine_killed(e); | |
1151 | xe_sched_tdr_queue_imm(&e->guc->sched); | |
1152 | } | |
1153 | ||
1154 | static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, | |
1155 | u32 opcode) | |
1156 | { | |
1157 | INIT_LIST_HEAD(&msg->link); | |
1158 | msg->opcode = opcode; | |
1159 | msg->private_data = e; | |
1160 | ||
1161 | trace_xe_sched_msg_add(msg); | |
1162 | xe_sched_add_msg(&e->guc->sched, msg); | |
1163 | } | |
1164 | ||
1165 | #define STATIC_MSG_CLEANUP 0 | |
1166 | #define STATIC_MSG_SUSPEND 1 | |
1167 | #define STATIC_MSG_RESUME 2 | |
1168 | static void guc_engine_fini(struct xe_engine *e) | |
1169 | { | |
1170 | struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; | |
1171 | ||
1172 | if (!(e->flags & ENGINE_FLAG_KERNEL)) | |
1173 | guc_engine_add_msg(e, msg, CLEANUP); | |
1174 | else | |
1175 | __guc_engine_fini(engine_to_guc(e), e); | |
1176 | } | |
1177 | ||
1178 | static int guc_engine_set_priority(struct xe_engine *e, | |
1179 | enum xe_engine_priority priority) | |
1180 | { | |
1181 | struct xe_sched_msg *msg; | |
1182 | ||
1183 | if (e->priority == priority || engine_killed_or_banned(e)) | |
1184 | return 0; | |
1185 | ||
1186 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1187 | if (!msg) | |
1188 | return -ENOMEM; | |
1189 | ||
1190 | guc_engine_add_msg(e, msg, SET_SCHED_PROPS); | |
1191 | e->priority = priority; | |
1192 | ||
1193 | return 0; | |
1194 | } | |
1195 | ||
1196 | static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) | |
1197 | { | |
1198 | struct xe_sched_msg *msg; | |
1199 | ||
1200 | if (e->sched_props.timeslice_us == timeslice_us || | |
1201 | engine_killed_or_banned(e)) | |
1202 | return 0; | |
1203 | ||
1204 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1205 | if (!msg) | |
1206 | return -ENOMEM; | |
1207 | ||
1208 | e->sched_props.timeslice_us = timeslice_us; | |
1209 | guc_engine_add_msg(e, msg, SET_SCHED_PROPS); | |
1210 | ||
1211 | return 0; | |
1212 | } | |
1213 | ||
1214 | static int guc_engine_set_preempt_timeout(struct xe_engine *e, | |
1215 | u32 preempt_timeout_us) | |
1216 | { | |
1217 | struct xe_sched_msg *msg; | |
1218 | ||
1219 | if (e->sched_props.preempt_timeout_us == preempt_timeout_us || | |
1220 | engine_killed_or_banned(e)) | |
1221 | return 0; | |
1222 | ||
1223 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | |
1224 | if (!msg) | |
1225 | return -ENOMEM; | |
1226 | ||
1227 | e->sched_props.preempt_timeout_us = preempt_timeout_us; | |
1228 | guc_engine_add_msg(e, msg, SET_SCHED_PROPS); | |
1229 | ||
1230 | return 0; | |
1231 | } | |
1232 | ||
1233 | static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) | |
1234 | { | |
1235 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
1236 | ||
1237 | XE_BUG_ON(engine_registered(e)); | |
1238 | XE_BUG_ON(engine_banned(e)); | |
1239 | XE_BUG_ON(engine_killed(e)); | |
1240 | ||
1241 | sched->base.timeout = job_timeout_ms; | |
1242 | ||
1243 | return 0; | |
1244 | } | |
1245 | ||
1246 | static int guc_engine_suspend(struct xe_engine *e) | |
1247 | { | |
1248 | struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; | |
1249 | ||
1250 | if (engine_killed_or_banned(e) || e->guc->suspend_pending) | |
1251 | return -EINVAL; | |
1252 | ||
1253 | e->guc->suspend_pending = true; | |
1254 | guc_engine_add_msg(e, msg, SUSPEND); | |
1255 | ||
1256 | return 0; | |
1257 | } | |
1258 | ||
1259 | static void guc_engine_suspend_wait(struct xe_engine *e) | |
1260 | { | |
1261 | struct xe_guc *guc = engine_to_guc(e); | |
1262 | ||
1263 | wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || | |
1264 | guc_read_stopped(guc)); | |
1265 | } | |
1266 | ||
1267 | static void guc_engine_resume(struct xe_engine *e) | |
1268 | { | |
1269 | struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; | |
1270 | ||
1271 | XE_BUG_ON(e->guc->suspend_pending); | |
1272 | ||
1273 | xe_mocs_init_engine(e); | |
1274 | guc_engine_add_msg(e, msg, RESUME); | |
1275 | } | |
1276 | ||
1277 | /* | |
1278 | * All of these functions are an abstraction layer which other parts of XE can | |
1279 | * use to trap into the GuC backend. All of these functions, aside from init, | |
1280 | * really shouldn't do much other than trap into the DRM scheduler which | |
1281 | * synchronizes these operations. | |
1282 | */ | |
1283 | static const struct xe_engine_ops guc_engine_ops = { | |
1284 | .init = guc_engine_init, | |
1285 | .kill = guc_engine_kill, | |
1286 | .fini = guc_engine_fini, | |
1287 | .set_priority = guc_engine_set_priority, | |
1288 | .set_timeslice = guc_engine_set_timeslice, | |
1289 | .set_preempt_timeout = guc_engine_set_preempt_timeout, | |
1290 | .set_job_timeout = guc_engine_set_job_timeout, | |
1291 | .suspend = guc_engine_suspend, | |
1292 | .suspend_wait = guc_engine_suspend_wait, | |
1293 | .resume = guc_engine_resume, | |
1294 | }; | |
1295 | ||
1296 | static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) | |
1297 | { | |
1298 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
1299 | ||
1300 | /* Stop scheduling + flush any DRM scheduler operations */ | |
1301 | xe_sched_submission_stop(sched); | |
1302 | ||
1303 | /* Clean up lost G2H + reset engine state */ | |
1304 | if (engine_destroyed(e) && engine_registered(e)) { | |
1305 | if (engine_banned(e)) | |
1306 | xe_engine_put(e); | |
1307 | else | |
1308 | __guc_engine_fini(guc, e); | |
1309 | } | |
1310 | if (e->guc->suspend_pending) { | |
1311 | set_engine_suspended(e); | |
1312 | suspend_fence_signal(e); | |
1313 | } | |
1314 | atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, | |
1315 | &e->guc->state); | |
1316 | e->guc->resume_time = 0; | |
1317 | trace_xe_engine_stop(e); | |
1318 | ||
1319 | /* | |
1320 | * Ban any engine (aside from kernel and engines used for VM ops) with a | |
1321 | * started but not complete job or if a job has gone through a GT reset | |
1322 | * more than twice. | |
1323 | */ | |
1324 | if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { | |
1325 | struct xe_sched_job *job = xe_sched_first_pending_job(sched); | |
1326 | ||
1327 | if (job) { | |
1328 | if ((xe_sched_job_started(job) && | |
1329 | !xe_sched_job_completed(job)) || | |
1330 | xe_sched_invalidate_job(job, 2)) { | |
1331 | trace_xe_sched_job_ban(job); | |
1332 | xe_sched_tdr_queue_imm(&e->guc->sched); | |
1333 | set_engine_banned(e); | |
1334 | } | |
1335 | } | |
1336 | } | |
1337 | } | |
1338 | ||
1339 | int xe_guc_submit_reset_prepare(struct xe_guc *guc) | |
1340 | { | |
1341 | int ret; | |
1342 | ||
1343 | /* | |
1344 | * Using an atomic here rather than submission_state.lock as this | |
1345 | * function can be called while holding the CT lock (engine reset | |
1346 | * failure). submission_state.lock needs the CT lock to resubmit jobs. | |
1347 | * Atomic is not ideal, but it works to prevent against concurrent reset | |
1348 | * and releasing any TDRs waiting on guc->submission_state.stopped. | |
1349 | */ | |
1350 | ret = atomic_fetch_or(1, &guc->submission_state.stopped); | |
1351 | smp_wmb(); | |
1352 | wake_up_all(&guc->ct.wq); | |
1353 | ||
1354 | return ret; | |
1355 | } | |
1356 | ||
1357 | void xe_guc_submit_reset_wait(struct xe_guc *guc) | |
1358 | { | |
1359 | wait_event(guc->ct.wq, !guc_read_stopped(guc)); | |
1360 | } | |
1361 | ||
1362 | int xe_guc_submit_stop(struct xe_guc *guc) | |
1363 | { | |
1364 | struct xe_engine *e; | |
1365 | unsigned long index; | |
1366 | ||
1367 | XE_BUG_ON(guc_read_stopped(guc) != 1); | |
1368 | ||
1369 | mutex_lock(&guc->submission_state.lock); | |
1370 | ||
1371 | xa_for_each(&guc->submission_state.engine_lookup, index, e) | |
1372 | guc_engine_stop(guc, e); | |
1373 | ||
1374 | mutex_unlock(&guc->submission_state.lock); | |
1375 | ||
1376 | /* | |
1377 | * No one can enter the backend at this point, aside from new engine | |
1378 | * creation which is protected by guc->submission_state.lock. | |
1379 | */ | |
1380 | ||
1381 | return 0; | |
1382 | } | |
1383 | ||
1384 | static void guc_engine_start(struct xe_engine *e) | |
1385 | { | |
1386 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
1387 | ||
1388 | if (!engine_killed_or_banned(e)) { | |
1389 | int i; | |
1390 | ||
1391 | trace_xe_engine_resubmit(e); | |
1392 | for (i = 0; i < e->width; ++i) | |
1393 | xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); | |
1394 | xe_sched_resubmit_jobs(sched); | |
1395 | } | |
1396 | ||
1397 | xe_sched_submission_start(sched); | |
1398 | } | |
1399 | ||
1400 | int xe_guc_submit_start(struct xe_guc *guc) | |
1401 | { | |
1402 | struct xe_engine *e; | |
1403 | unsigned long index; | |
1404 | ||
1405 | XE_BUG_ON(guc_read_stopped(guc) != 1); | |
1406 | ||
1407 | mutex_lock(&guc->submission_state.lock); | |
1408 | atomic_dec(&guc->submission_state.stopped); | |
1409 | xa_for_each(&guc->submission_state.engine_lookup, index, e) | |
1410 | guc_engine_start(e); | |
1411 | mutex_unlock(&guc->submission_state.lock); | |
1412 | ||
1413 | wake_up_all(&guc->ct.wq); | |
1414 | ||
1415 | return 0; | |
1416 | } | |
1417 | ||
1418 | static struct xe_engine * | |
1419 | g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) | |
1420 | { | |
1421 | struct xe_device *xe = guc_to_xe(guc); | |
1422 | struct xe_engine *e; | |
1423 | ||
1424 | if (unlikely(guc_id >= GUC_ID_MAX)) { | |
1425 | drm_err(&xe->drm, "Invalid guc_id %u", guc_id); | |
1426 | return NULL; | |
1427 | } | |
1428 | ||
1429 | e = xa_load(&guc->submission_state.engine_lookup, guc_id); | |
1430 | if (unlikely(!e)) { | |
1431 | drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); | |
1432 | return NULL; | |
1433 | } | |
1434 | ||
1435 | XE_BUG_ON(e->guc->id != guc_id); | |
1436 | ||
1437 | return e; | |
1438 | } | |
1439 | ||
1440 | static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) | |
1441 | { | |
1442 | u32 action[] = { | |
1443 | XE_GUC_ACTION_DEREGISTER_CONTEXT, | |
1444 | e->guc->id, | |
1445 | }; | |
1446 | ||
1447 | trace_xe_engine_deregister(e); | |
1448 | ||
1449 | xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); | |
1450 | } | |
1451 | ||
1452 | int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1453 | { | |
1454 | struct xe_device *xe = guc_to_xe(guc); | |
1455 | struct xe_engine *e; | |
1456 | u32 guc_id = msg[0]; | |
1457 | ||
1458 | if (unlikely(len < 2)) { | |
1459 | drm_err(&xe->drm, "Invalid length %u", len); | |
1460 | return -EPROTO; | |
1461 | } | |
1462 | ||
1463 | e = g2h_engine_lookup(guc, guc_id); | |
1464 | if (unlikely(!e)) | |
1465 | return -EPROTO; | |
1466 | ||
1467 | if (unlikely(!engine_pending_enable(e) && | |
1468 | !engine_pending_disable(e))) { | |
1469 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", | |
1470 | atomic_read(&e->guc->state)); | |
1471 | return -EPROTO; | |
1472 | } | |
1473 | ||
1474 | trace_xe_engine_scheduling_done(e); | |
1475 | ||
1476 | if (engine_pending_enable(e)) { | |
1477 | e->guc->resume_time = ktime_get(); | |
1478 | clear_engine_pending_enable(e); | |
1479 | smp_wmb(); | |
1480 | wake_up_all(&guc->ct.wq); | |
1481 | } else { | |
1482 | clear_engine_pending_disable(e); | |
1483 | if (e->guc->suspend_pending) { | |
1484 | suspend_fence_signal(e); | |
1485 | } else { | |
1486 | if (engine_banned(e)) { | |
1487 | smp_wmb(); | |
1488 | wake_up_all(&guc->ct.wq); | |
1489 | } | |
1490 | deregister_engine(guc, e); | |
1491 | } | |
1492 | } | |
1493 | ||
1494 | return 0; | |
1495 | } | |
1496 | ||
1497 | int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1498 | { | |
1499 | struct xe_device *xe = guc_to_xe(guc); | |
1500 | struct xe_engine *e; | |
1501 | u32 guc_id = msg[0]; | |
1502 | ||
1503 | if (unlikely(len < 1)) { | |
1504 | drm_err(&xe->drm, "Invalid length %u", len); | |
1505 | return -EPROTO; | |
1506 | } | |
1507 | ||
1508 | e = g2h_engine_lookup(guc, guc_id); | |
1509 | if (unlikely(!e)) | |
1510 | return -EPROTO; | |
1511 | ||
1512 | if (!engine_destroyed(e) || engine_pending_disable(e) || | |
1513 | engine_pending_enable(e) || engine_enabled(e)) { | |
1514 | drm_err(&xe->drm, "Unexpected engine state 0x%04x", | |
1515 | atomic_read(&e->guc->state)); | |
1516 | return -EPROTO; | |
1517 | } | |
1518 | ||
1519 | trace_xe_engine_deregister_done(e); | |
1520 | ||
1521 | clear_engine_registered(e); | |
1522 | if (engine_banned(e)) | |
1523 | xe_engine_put(e); | |
1524 | else | |
1525 | __guc_engine_fini(guc, e); | |
1526 | ||
1527 | return 0; | |
1528 | } | |
1529 | ||
1530 | int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1531 | { | |
1532 | struct xe_device *xe = guc_to_xe(guc); | |
1533 | struct xe_engine *e; | |
1534 | u32 guc_id = msg[0]; | |
1535 | ||
1536 | if (unlikely(len < 1)) { | |
1537 | drm_err(&xe->drm, "Invalid length %u", len); | |
1538 | return -EPROTO; | |
1539 | } | |
1540 | ||
1541 | e = g2h_engine_lookup(guc, guc_id); | |
1542 | if (unlikely(!e)) | |
1543 | return -EPROTO; | |
1544 | ||
1545 | drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); | |
1546 | ||
1547 | /* FIXME: Do error capture, most likely async */ | |
1548 | ||
1549 | trace_xe_engine_reset(e); | |
1550 | ||
1551 | /* | |
1552 | * A banned engine is a NOP at this point (came from | |
1553 | * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel | |
1554 | * jobs by setting timeout of the job to the minimum value kicking | |
1555 | * guc_engine_timedout_job. | |
1556 | */ | |
1557 | set_engine_reset(e); | |
1558 | if (!engine_banned(e)) | |
1559 | xe_sched_tdr_queue_imm(&e->guc->sched); | |
1560 | ||
1561 | return 0; | |
1562 | } | |
1563 | ||
1564 | int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, | |
1565 | u32 len) | |
1566 | { | |
1567 | struct xe_device *xe = guc_to_xe(guc); | |
1568 | struct xe_engine *e; | |
1569 | u32 guc_id = msg[0]; | |
1570 | ||
1571 | if (unlikely(len < 1)) { | |
1572 | drm_err(&xe->drm, "Invalid length %u", len); | |
1573 | return -EPROTO; | |
1574 | } | |
1575 | ||
1576 | e = g2h_engine_lookup(guc, guc_id); | |
1577 | if (unlikely(!e)) | |
1578 | return -EPROTO; | |
1579 | ||
1580 | drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); | |
1581 | trace_xe_engine_memory_cat_error(e); | |
1582 | ||
1583 | /* Treat the same as engine reset */ | |
1584 | set_engine_reset(e); | |
1585 | if (!engine_banned(e)) | |
1586 | xe_sched_tdr_queue_imm(&e->guc->sched); | |
1587 | ||
1588 | return 0; | |
1589 | } | |
1590 | ||
1591 | int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) | |
1592 | { | |
1593 | struct xe_device *xe = guc_to_xe(guc); | |
1594 | u8 guc_class, instance; | |
1595 | u32 reason; | |
1596 | ||
1597 | if (unlikely(len != 3)) { | |
1598 | drm_err(&xe->drm, "Invalid length %u", len); | |
1599 | return -EPROTO; | |
1600 | } | |
1601 | ||
1602 | guc_class = msg[0]; | |
1603 | instance = msg[1]; | |
1604 | reason = msg[2]; | |
1605 | ||
1606 | /* Unexpected failure of a hardware feature, log an actual error */ | |
1607 | drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", | |
1608 | guc_class, instance, reason); | |
1609 | ||
1610 | xe_gt_reset_async(guc_to_gt(guc)); | |
1611 | ||
1612 | return 0; | |
1613 | } | |
1614 | ||
1615 | static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) | |
1616 | { | |
1617 | struct xe_guc *guc = engine_to_guc(e); | |
1618 | struct xe_device *xe = guc_to_xe(guc); | |
1619 | struct iosys_map map = xe_lrc_parallel_map(e->lrc); | |
1620 | int i; | |
1621 | ||
1622 | drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", | |
1623 | e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); | |
1624 | drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", | |
1625 | e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); | |
1626 | drm_printf(p, "\tWQ status: %u\n", | |
1627 | parallel_read(xe, map, wq_desc.wq_status)); | |
1628 | if (parallel_read(xe, map, wq_desc.head) != | |
1629 | parallel_read(xe, map, wq_desc.tail)) { | |
1630 | for (i = parallel_read(xe, map, wq_desc.head); | |
1631 | i != parallel_read(xe, map, wq_desc.tail); | |
1632 | i = (i + sizeof(u32)) % WQ_SIZE) | |
857912c3 | 1633 | drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), |
dd08ebf6 MB |
1634 | parallel_read(xe, map, wq[i / sizeof(u32)])); |
1635 | } | |
1636 | } | |
1637 | ||
1638 | static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) | |
1639 | { | |
1640 | struct xe_gpu_scheduler *sched = &e->guc->sched; | |
1641 | struct xe_sched_job *job; | |
1642 | int i; | |
1643 | ||
1644 | drm_printf(p, "\nGuC ID: %d\n", e->guc->id); | |
1645 | drm_printf(p, "\tName: %s\n", e->name); | |
1646 | drm_printf(p, "\tClass: %d\n", e->class); | |
1647 | drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); | |
1648 | drm_printf(p, "\tWidth: %d\n", e->width); | |
1649 | drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); | |
1650 | drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); | |
1651 | drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); | |
1652 | drm_printf(p, "\tPreempt timeout: %u (us)\n", | |
1653 | e->sched_props.preempt_timeout_us); | |
1654 | for (i = 0; i < e->width; ++i ) { | |
1655 | struct xe_lrc *lrc = e->lrc + i; | |
1656 | ||
1657 | drm_printf(p, "\tHW Context Desc: 0x%08x\n", | |
1658 | lower_32_bits(xe_lrc_ggtt_addr(lrc))); | |
1659 | drm_printf(p, "\tLRC Head: (memory) %u\n", | |
1660 | xe_lrc_ring_head(lrc)); | |
1661 | drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", | |
1662 | lrc->ring.tail, | |
1663 | xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); | |
1664 | drm_printf(p, "\tStart seqno: (memory) %d\n", | |
1665 | xe_lrc_start_seqno(lrc)); | |
1666 | drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); | |
1667 | } | |
1668 | drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); | |
1669 | drm_printf(p, "\tFlags: 0x%lx\n", e->flags); | |
1670 | if (xe_engine_is_parallel(e)) | |
1671 | guc_engine_wq_print(e, p); | |
1672 | ||
1673 | spin_lock(&sched->base.job_list_lock); | |
1674 | list_for_each_entry(job, &sched->base.pending_list, drm.list) | |
1675 | drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", | |
1676 | xe_sched_job_seqno(job), | |
1677 | dma_fence_is_signaled(job->fence) ? 1 : 0, | |
1678 | dma_fence_is_signaled(&job->drm.s_fence->finished) ? | |
1679 | 1 : 0); | |
1680 | spin_unlock(&sched->base.job_list_lock); | |
1681 | } | |
1682 | ||
1683 | void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) | |
1684 | { | |
1685 | struct xe_engine *e; | |
1686 | unsigned long index; | |
1687 | ||
1688 | if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) | |
1689 | return; | |
1690 | ||
1691 | mutex_lock(&guc->submission_state.lock); | |
1692 | xa_for_each(&guc->submission_state.engine_lookup, index, e) | |
1693 | guc_engine_print(e, p); | |
1694 | mutex_unlock(&guc->submission_state.lock); | |
1695 | } |