Commit | Line | Data |
---|---|---|
0d1caff4 DCS |
1 | // SPDX-License-Identifier: MIT |
2 | /* | |
3 | * Copyright © 2023 Intel Corporation | |
4 | */ | |
5 | ||
6 | #include "xe_gsc.h" | |
7 | ||
dd0e89e5 DCS |
8 | #include <drm/drm_managed.h> |
9 | ||
a24d9099 DH |
10 | #include <generated/xe_wa_oob.h> |
11 | ||
0881cbe0 | 12 | #include "abi/gsc_mkhi_commands_abi.h" |
dd0e89e5 DCS |
13 | #include "xe_bb.h" |
14 | #include "xe_bo.h" | |
0d1caff4 | 15 | #include "xe_device.h" |
dd0e89e5 | 16 | #include "xe_exec_queue.h" |
997a55ca | 17 | #include "xe_gsc_proxy.h" |
0881cbe0 | 18 | #include "xe_gsc_submit.h" |
0d1caff4 DCS |
19 | #include "xe_gt.h" |
20 | #include "xe_gt_printk.h" | |
d8b15713 | 21 | #include "xe_huc.h" |
dd0e89e5 DCS |
22 | #include "xe_map.h" |
23 | #include "xe_mmio.h" | |
24 | #include "xe_sched_job.h" | |
0d1caff4 | 25 | #include "xe_uc_fw.h" |
aae84bf1 | 26 | #include "xe_wa.h" |
dd0e89e5 DCS |
27 | #include "instructions/xe_gsc_commands.h" |
28 | #include "regs/xe_gsc_regs.h" | |
0d1caff4 DCS |
29 | |
30 | static struct xe_gt * | |
31 | gsc_to_gt(struct xe_gsc *gsc) | |
32 | { | |
33 | return container_of(gsc, struct xe_gt, uc.gsc); | |
34 | } | |
35 | ||
dd0e89e5 DCS |
36 | static int memcpy_fw(struct xe_gsc *gsc) |
37 | { | |
38 | struct xe_gt *gt = gsc_to_gt(gsc); | |
39 | struct xe_device *xe = gt_to_xe(gt); | |
40 | u32 fw_size = gsc->fw.size; | |
41 | void *storage; | |
42 | ||
43 | /* | |
44 | * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use | |
45 | * a memcpy for now. | |
46 | */ | |
47 | storage = kmalloc(fw_size, GFP_KERNEL); | |
48 | if (!storage) | |
49 | return -ENOMEM; | |
50 | ||
51 | xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); | |
52 | xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); | |
53 | xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); | |
54 | ||
55 | kfree(storage); | |
56 | ||
57 | return 0; | |
58 | } | |
59 | ||
60 | static int emit_gsc_upload(struct xe_gsc *gsc) | |
61 | { | |
62 | struct xe_gt *gt = gsc_to_gt(gsc); | |
63 | u64 offset = xe_bo_ggtt_addr(gsc->private); | |
64 | struct xe_bb *bb; | |
65 | struct xe_sched_job *job; | |
66 | struct dma_fence *fence; | |
67 | long timeout; | |
68 | ||
69 | bb = xe_bb_new(gt, 4, false); | |
70 | if (IS_ERR(bb)) | |
71 | return PTR_ERR(bb); | |
72 | ||
73 | bb->cs[bb->len++] = GSC_FW_LOAD; | |
74 | bb->cs[bb->len++] = lower_32_bits(offset); | |
75 | bb->cs[bb->len++] = upper_32_bits(offset); | |
76 | bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; | |
77 | ||
78 | job = xe_bb_create_job(gsc->q, bb); | |
79 | if (IS_ERR(job)) { | |
80 | xe_bb_free(bb, NULL); | |
81 | return PTR_ERR(job); | |
82 | } | |
83 | ||
84 | xe_sched_job_arm(job); | |
85 | fence = dma_fence_get(&job->drm.s_fence->finished); | |
86 | xe_sched_job_push(job); | |
87 | ||
88 | timeout = dma_fence_wait_timeout(fence, false, HZ); | |
89 | dma_fence_put(fence); | |
90 | xe_bb_free(bb, NULL); | |
91 | if (timeout < 0) | |
92 | return timeout; | |
93 | else if (!timeout) | |
94 | return -ETIME; | |
95 | ||
96 | return 0; | |
97 | } | |
98 | ||
0881cbe0 DCS |
99 | #define version_query_wr(xe_, map_, offset_, field_, val_) \ |
100 | xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) | |
101 | #define version_query_rd(xe_, map_, offset_, field_) \ | |
102 | xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) | |
103 | ||
104 | static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) | |
105 | { | |
106 | xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); | |
107 | ||
108 | version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); | |
109 | version_query_wr(xe, map, wr_offset, header.command, | |
110 | MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); | |
111 | ||
112 | return wr_offset + sizeof(struct gsc_get_compatibility_version_in); | |
113 | } | |
114 | ||
115 | #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ | |
116 | static int query_compatibility_version(struct xe_gsc *gsc) | |
117 | { | |
118 | struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; | |
119 | struct xe_gt *gt = gsc_to_gt(gsc); | |
120 | struct xe_tile *tile = gt_to_tile(gt); | |
121 | struct xe_device *xe = gt_to_xe(gt); | |
122 | struct xe_bo *bo; | |
123 | u32 wr_offset; | |
124 | u32 rd_offset; | |
125 | u64 ggtt_offset; | |
126 | int err; | |
127 | ||
128 | bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, | |
129 | ttm_bo_type_kernel, | |
130 | XE_BO_CREATE_SYSTEM_BIT | | |
131 | XE_BO_CREATE_GGTT_BIT); | |
132 | if (IS_ERR(bo)) { | |
133 | xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); | |
134 | return PTR_ERR(bo); | |
135 | } | |
136 | ||
137 | ggtt_offset = xe_bo_ggtt_addr(bo); | |
138 | ||
139 | wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, | |
140 | sizeof(struct gsc_get_compatibility_version_in)); | |
141 | wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); | |
142 | ||
143 | err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, | |
144 | ggtt_offset + GSC_VER_PKT_SZ, | |
145 | GSC_VER_PKT_SZ); | |
146 | if (err) { | |
147 | xe_gt_err(gt, | |
148 | "failed to submit GSC request for compatibility version: %d\n", | |
149 | err); | |
150 | goto out_bo; | |
151 | } | |
152 | ||
153 | err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, | |
154 | sizeof(struct gsc_get_compatibility_version_out), | |
155 | &rd_offset); | |
156 | if (err) { | |
157 | xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); | |
158 | return err; | |
159 | } | |
160 | ||
161 | compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); | |
162 | compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); | |
163 | ||
164 | xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); | |
165 | ||
166 | out_bo: | |
167 | xe_bo_unpin_map_no_vm(bo); | |
168 | return err; | |
169 | } | |
170 | ||
dd0e89e5 DCS |
171 | static int gsc_fw_is_loaded(struct xe_gt *gt) |
172 | { | |
173 | return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & | |
174 | HECI1_FWSTS1_INIT_COMPLETE; | |
175 | } | |
176 | ||
177 | static int gsc_fw_wait(struct xe_gt *gt) | |
178 | { | |
179 | /* | |
180 | * GSC load can take up to 250ms from the moment the instruction is | |
181 | * executed by the GSCCS. To account for possible submission delays or | |
182 | * other issues, we use a 500ms timeout in the wait here. | |
183 | */ | |
184 | return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), | |
185 | HECI1_FWSTS1_INIT_COMPLETE, | |
186 | HECI1_FWSTS1_INIT_COMPLETE, | |
187 | 500 * USEC_PER_MSEC, NULL, false); | |
188 | } | |
189 | ||
190 | static int gsc_upload(struct xe_gsc *gsc) | |
191 | { | |
192 | struct xe_gt *gt = gsc_to_gt(gsc); | |
193 | struct xe_device *xe = gt_to_xe(gt); | |
194 | int err; | |
195 | ||
196 | /* we should only be here if the init step were successful */ | |
197 | xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); | |
198 | ||
199 | if (gsc_fw_is_loaded(gt)) { | |
200 | xe_gt_err(gt, "GSC already loaded at upload time\n"); | |
201 | return -EEXIST; | |
202 | } | |
203 | ||
204 | err = memcpy_fw(gsc); | |
205 | if (err) { | |
206 | xe_gt_err(gt, "Failed to memcpy GSC FW\n"); | |
207 | return err; | |
208 | } | |
209 | ||
f63182b4 DCS |
210 | /* |
211 | * GSC is only killed by an FLR, so we need to trigger one on unload to | |
212 | * make sure we stop it. This is because we assign a chunk of memory to | |
213 | * the GSC as part of the FW load, so we need to make sure it stops | |
214 | * using it when we release it to the system on driver unload. Note that | |
215 | * this is not a problem of the unload per-se, because the GSC will not | |
216 | * touch that memory unless there are requests for it coming from the | |
217 | * driver; therefore, no accesses will happen while Xe is not loaded, | |
218 | * but if we re-load the driver then the GSC might wake up and try to | |
219 | * access that old memory location again. | |
220 | * Given that an FLR is a very disruptive action (see the FLR function | |
221 | * for details), we want to do it as the last action before releasing | |
222 | * the access to the MMIO bar, which means we need to do it as part of | |
223 | * mmio cleanup. | |
224 | */ | |
225 | xe->needs_flr_on_fini = true; | |
226 | ||
dd0e89e5 DCS |
227 | err = emit_gsc_upload(gsc); |
228 | if (err) { | |
229 | xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); | |
230 | return err; | |
231 | } | |
232 | ||
233 | err = gsc_fw_wait(gt); | |
234 | if (err) { | |
235 | xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); | |
236 | return err; | |
237 | } | |
238 | ||
0881cbe0 DCS |
239 | err = query_compatibility_version(gsc); |
240 | if (err) | |
241 | return err; | |
242 | ||
243 | err = xe_uc_fw_check_version_requirements(&gsc->fw); | |
244 | if (err) | |
245 | return err; | |
246 | ||
997a55ca DCS |
247 | return 0; |
248 | } | |
249 | ||
250 | static int gsc_upload_and_init(struct xe_gsc *gsc) | |
251 | { | |
252 | struct xe_gt *gt = gsc_to_gt(gsc); | |
253 | int ret; | |
254 | ||
255 | ret = gsc_upload(gsc); | |
256 | if (ret) | |
257 | return ret; | |
258 | ||
259 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); | |
dd0e89e5 DCS |
260 | xe_gt_dbg(gt, "GSC FW async load completed\n"); |
261 | ||
997a55ca DCS |
262 | /* HuC auth failure is not fatal */ |
263 | if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) | |
264 | xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); | |
265 | ||
266 | ret = xe_gsc_proxy_start(gsc); | |
267 | if (ret) | |
268 | return ret; | |
269 | ||
270 | xe_gt_dbg(gt, "GSC proxy init completed\n"); | |
271 | ||
dd0e89e5 DCS |
272 | return 0; |
273 | } | |
274 | ||
275 | static void gsc_work(struct work_struct *work) | |
276 | { | |
277 | struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); | |
278 | struct xe_gt *gt = gsc_to_gt(gsc); | |
279 | struct xe_device *xe = gt_to_xe(gt); | |
eb08104f | 280 | u32 actions; |
dd0e89e5 DCS |
281 | int ret; |
282 | ||
eb08104f DCS |
283 | spin_lock_irq(&gsc->lock); |
284 | actions = gsc->work_actions; | |
285 | gsc->work_actions = 0; | |
286 | spin_unlock_irq(&gsc->lock); | |
287 | ||
dd0e89e5 DCS |
288 | xe_device_mem_access_get(xe); |
289 | xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); | |
290 | ||
eb08104f DCS |
291 | if (actions & GSC_ACTION_FW_LOAD) { |
292 | ret = gsc_upload_and_init(gsc); | |
293 | if (ret && ret != -EEXIST) | |
294 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); | |
295 | else | |
296 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); | |
297 | } | |
298 | ||
299 | if (actions & GSC_ACTION_SW_PROXY) | |
300 | xe_gsc_proxy_request_handler(gsc); | |
dd0e89e5 DCS |
301 | |
302 | xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); | |
303 | xe_device_mem_access_put(xe); | |
304 | } | |
305 | ||
0d1caff4 DCS |
306 | int xe_gsc_init(struct xe_gsc *gsc) |
307 | { | |
308 | struct xe_gt *gt = gsc_to_gt(gsc); | |
309 | struct xe_tile *tile = gt_to_tile(gt); | |
310 | int ret; | |
311 | ||
312 | gsc->fw.type = XE_UC_FW_TYPE_GSC; | |
dd0e89e5 | 313 | INIT_WORK(&gsc->work, gsc_work); |
eb08104f | 314 | spin_lock_init(&gsc->lock); |
0d1caff4 DCS |
315 | |
316 | /* The GSC uC is only available on the media GT */ | |
317 | if (tile->media_gt && (gt != tile->media_gt)) { | |
318 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); | |
319 | return 0; | |
320 | } | |
321 | ||
322 | /* | |
323 | * Some platforms can have GuC but not GSC. That would cause | |
324 | * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort | |
325 | * all firmware loading. So check for GSC being enabled before | |
326 | * propagating the failure back up. That way the higher level will keep | |
327 | * going and load GuC as appropriate. | |
328 | */ | |
329 | ret = xe_uc_fw_init(&gsc->fw); | |
330 | if (!xe_uc_fw_is_enabled(&gsc->fw)) | |
331 | return 0; | |
332 | else if (ret) | |
333 | goto out; | |
334 | ||
997a55ca DCS |
335 | ret = xe_gsc_proxy_init(gsc); |
336 | if (ret && ret != -ENODEV) | |
337 | goto out; | |
338 | ||
0d1caff4 DCS |
339 | return 0; |
340 | ||
341 | out: | |
342 | xe_gt_err(gt, "GSC init failed with %d", ret); | |
343 | return ret; | |
344 | } | |
345 | ||
dd0e89e5 DCS |
346 | static void free_resources(struct drm_device *drm, void *arg) |
347 | { | |
348 | struct xe_gsc *gsc = arg; | |
349 | ||
350 | if (gsc->wq) { | |
351 | destroy_workqueue(gsc->wq); | |
352 | gsc->wq = NULL; | |
353 | } | |
354 | ||
355 | if (gsc->q) { | |
356 | xe_exec_queue_put(gsc->q); | |
357 | gsc->q = NULL; | |
358 | } | |
359 | ||
360 | if (gsc->private) { | |
361 | xe_bo_unpin_map_no_vm(gsc->private); | |
362 | gsc->private = NULL; | |
363 | } | |
364 | } | |
365 | ||
366 | int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) | |
367 | { | |
368 | struct xe_gt *gt = gsc_to_gt(gsc); | |
369 | struct xe_tile *tile = gt_to_tile(gt); | |
370 | struct xe_device *xe = gt_to_xe(gt); | |
371 | struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); | |
372 | struct xe_exec_queue *q; | |
373 | struct workqueue_struct *wq; | |
374 | struct xe_bo *bo; | |
375 | int err; | |
376 | ||
377 | if (!xe_uc_fw_is_available(&gsc->fw)) | |
378 | return 0; | |
379 | ||
380 | if (!hwe) | |
381 | return -ENODEV; | |
382 | ||
383 | bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, | |
384 | ttm_bo_type_kernel, | |
385 | XE_BO_CREATE_STOLEN_BIT | | |
386 | XE_BO_CREATE_GGTT_BIT); | |
387 | if (IS_ERR(bo)) | |
388 | return PTR_ERR(bo); | |
389 | ||
390 | q = xe_exec_queue_create(xe, NULL, | |
391 | BIT(hwe->logical_instance), 1, hwe, | |
392 | EXEC_QUEUE_FLAG_KERNEL | | |
25ce7c50 | 393 | EXEC_QUEUE_FLAG_PERMANENT, 0); |
dd0e89e5 DCS |
394 | if (IS_ERR(q)) { |
395 | xe_gt_err(gt, "Failed to create queue for GSC submission\n"); | |
396 | err = PTR_ERR(q); | |
397 | goto out_bo; | |
398 | } | |
399 | ||
400 | wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); | |
401 | if (!wq) { | |
402 | err = -ENOMEM; | |
403 | goto out_q; | |
404 | } | |
405 | ||
406 | gsc->private = bo; | |
407 | gsc->q = q; | |
408 | gsc->wq = wq; | |
409 | ||
410 | err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); | |
411 | if (err) | |
412 | return err; | |
413 | ||
414 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); | |
415 | ||
416 | return 0; | |
417 | ||
418 | out_q: | |
419 | xe_exec_queue_put(q); | |
420 | out_bo: | |
421 | xe_bo_unpin_map_no_vm(bo); | |
422 | return err; | |
423 | } | |
424 | ||
425 | void xe_gsc_load_start(struct xe_gsc *gsc) | |
426 | { | |
427 | struct xe_gt *gt = gsc_to_gt(gsc); | |
428 | ||
429 | if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) | |
430 | return; | |
431 | ||
432 | /* GSC FW survives GT reset and D3Hot */ | |
433 | if (gsc_fw_is_loaded(gt)) { | |
434 | xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); | |
435 | return; | |
436 | } | |
437 | ||
eb08104f DCS |
438 | spin_lock_irq(&gsc->lock); |
439 | gsc->work_actions |= GSC_ACTION_FW_LOAD; | |
440 | spin_unlock_irq(&gsc->lock); | |
441 | ||
dd0e89e5 DCS |
442 | queue_work(gsc->wq, &gsc->work); |
443 | } | |
444 | ||
445 | void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) | |
446 | { | |
447 | if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) | |
448 | flush_work(&gsc->work); | |
449 | } | |
aae84bf1 | 450 | |
997a55ca DCS |
451 | /** |
452 | * xe_gsc_remove() - Clean up the GSC structures before driver removal | |
453 | * @gsc: the GSC uC | |
454 | */ | |
455 | void xe_gsc_remove(struct xe_gsc *gsc) | |
456 | { | |
457 | xe_gsc_proxy_remove(gsc); | |
458 | } | |
459 | ||
aae84bf1 DCS |
460 | /* |
461 | * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a | |
462 | * GSC engine reset by writing a notification bit in the GS1 register and then | |
463 | * triggering an interrupt to GSC; from the interrupt it will take up to 200ms | |
464 | * for the FW to get prepare for the reset, so we need to wait for that amount | |
465 | * of time. | |
466 | * After the reset is complete we need to then clear the GS1 register. | |
467 | */ | |
468 | void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) | |
469 | { | |
470 | u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; | |
471 | u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; | |
472 | ||
473 | /* WA only applies if the GSC is loaded */ | |
474 | if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) | |
475 | return; | |
476 | ||
477 | xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); | |
478 | ||
479 | if (prep) { | |
480 | /* make sure the reset bit is clear when writing the CSR reg */ | |
481 | xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), | |
482 | HECI_H_CSR_RST, HECI_H_CSR_IG); | |
483 | msleep(200); | |
484 | } | |
485 | } |