Merge tag 'mm-hotfixes-stable-2025-07-11-16-16' of git://git.kernel.org/pub/scm/linux...
[linux-2.6-block.git] / drivers / gpu / drm / xe / xe_lrc.c
CommitLineData
dd08ebf6
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_lrc.h"
7
1d734a3e
MR
8#include <generated/xe_wa_oob.h>
9
4d5242a0
ML
10#include <linux/ascii85.h>
11
0134f130 12#include "instructions/xe_mi_commands.h"
6de492ae 13#include "instructions/xe_gfxpipe_commands.h"
b9b7db49 14#include "instructions/xe_gfx_state_commands.h"
b79e8fd9 15#include "regs/xe_engine_regs.h"
0992884d 16#include "regs/xe_lrc_layout.h"
b1543a49 17#include "xe_bb.h"
dd08ebf6
MB
18#include "xe_bo.h"
19#include "xe_device.h"
303fb116 20#include "xe_drm_client.h"
9b9529ce 21#include "xe_exec_queue_types.h"
dd08ebf6 22#include "xe_gt.h"
0f60547f 23#include "xe_gt_printk.h"
dd08ebf6 24#include "xe_hw_fence.h"
ea9f879d 25#include "xe_map.h"
9a30b04f 26#include "xe_memirq.h"
617d824c 27#include "xe_mmio.h"
9a30b04f 28#include "xe_sriov.h"
a7238ee3 29#include "xe_trace_lrc.h"
dd08ebf6 30#include "xe_vm.h"
1d734a3e 31#include "xe_wa.h"
dd08ebf6 32
cf03825b
NV
33#define LRC_VALID BIT_ULL(0)
34#define LRC_PRIVILEGE BIT_ULL(8)
35#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
152ca51d 36#define LRC_LEGACY_64B_CONTEXT 3
dd08ebf6 37
cf03825b
NV
38#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
39#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
dd08ebf6 40
bac01606 41#define LRC_PPHWSP_SIZE SZ_4K
d6219e1c 42#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
afcad924 43#define LRC_WA_BB_SIZE SZ_4K
d6219e1c 44
dd08ebf6
MB
45static struct xe_device *
46lrc_to_xe(struct xe_lrc *lrc)
47{
48 return gt_to_xe(lrc->fence_ctx.gt);
49}
50
d6219e1c 51size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
dd08ebf6 52{
d6219e1c
NV
53 struct xe_device *xe = gt_to_xe(gt);
54 size_t size;
55
bac01606
GS
56 /* Per-process HW status page (PPHWSP) */
57 size = LRC_PPHWSP_SIZE;
58
59 /* Engine context image */
dd08ebf6
MB
60 switch (class) {
61 case XE_ENGINE_CLASS_RENDER:
13a3398b 62 if (GRAPHICS_VER(xe) >= 20)
bac01606 63 size += 3 * SZ_4K;
13a3398b 64 else
bac01606 65 size += 13 * SZ_4K;
d6219e1c 66 break;
dd08ebf6 67 case XE_ENGINE_CLASS_COMPUTE:
13a3398b 68 if (GRAPHICS_VER(xe) >= 20)
bac01606 69 size += 2 * SZ_4K;
13a3398b 70 else
bac01606 71 size += 13 * SZ_4K;
d6219e1c 72 break;
dd08ebf6
MB
73 default:
74 WARN(1, "Unknown engine class: %d", class);
75 fallthrough;
76 case XE_ENGINE_CLASS_COPY:
77 case XE_ENGINE_CLASS_VIDEO_DECODE:
78 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
29654910 79 case XE_ENGINE_CLASS_OTHER:
bac01606 80 size += 1 * SZ_4K;
dd08ebf6 81 }
d6219e1c
NV
82
83 /* Add indirect ring state page */
84 if (xe_gt_has_indirect_ring_state(gt))
85 size += LRC_INDIRECT_RING_STATE_SIZE;
86
87 return size;
dd08ebf6
MB
88}
89
90/*
91 * The per-platform tables are u8-encoded in @data. Decode @data and set the
92 * addresses' offset and commands in @regs. The following encoding is used
93 * for each byte. There are 2 steps: decoding commands and decoding addresses.
94 *
95 * Commands:
96 * [7]: create NOPs - number of NOPs are set in lower bits
97 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
98 * MI_LRI_FORCE_POSTED
99 * [5:0]: Number of NOPs or registers to set values to in case of
100 * MI_LOAD_REGISTER_IMM
101 *
102 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
103 * number of registers. They are set by using the REG/REG16 macros: the former
104 * is used for offsets smaller than 0x200 while the latter is for values bigger
105 * than that. Those macros already set all the bits documented below correctly:
106 *
107 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
108 * follow, for the lower bits
109 * [6:0]: Register offset, without considering the engine base.
110 *
111 * This function only tweaks the commands and register offsets. Values are not
112 * filled out.
113 */
114static void set_offsets(u32 *regs,
115 const u8 *data,
116 const struct xe_hw_engine *hwe)
117#define NOP(x) (BIT(7) | (x))
118#define LRI(count, flags) ((flags) << 6 | (count) | \
119 BUILD_BUG_ON_ZERO(count >= BIT(6)))
120#define POSTED BIT(0)
121#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
122#define REG16(x) \
123 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
124 (((x) >> 2) & 0x7f)
dd08ebf6
MB
125{
126 const u32 base = hwe->mmio_base;
127
128 while (*data) {
129 u8 count, flags;
130
131 if (*data & BIT(7)) { /* skip */
132 count = *data++ & ~BIT(7);
133 regs += count;
134 continue;
135 }
136
137 count = *data & 0x3f;
138 flags = *data >> 6;
139 data++;
140
e12a6488 141 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
dd08ebf6
MB
142 if (flags & POSTED)
143 *regs |= MI_LRI_FORCE_POSTED;
144 *regs |= MI_LRI_LRM_CS_MMIO;
145 regs++;
146
c73acc1e 147 xe_gt_assert(hwe->gt, count);
dd08ebf6
MB
148 do {
149 u32 offset = 0;
150 u8 v;
151
152 do {
153 v = *data++;
154 offset <<= 7;
155 offset |= v & ~BIT(7);
156 } while (v & BIT(7));
157
158 regs[0] = base + (offset << 2);
159 regs += 2;
160 } while (--count);
161 }
162
163 *regs = MI_BATCH_BUFFER_END | BIT(0);
164}
165
166static const u8 gen12_xcs_offsets[] = {
167 NOP(1),
168 LRI(13, POSTED),
169 REG16(0x244),
170 REG(0x034),
171 REG(0x030),
172 REG(0x038),
173 REG(0x03c),
174 REG(0x168),
175 REG(0x140),
176 REG(0x110),
177 REG(0x1c0),
178 REG(0x1c4),
179 REG(0x1c8),
180 REG(0x180),
181 REG16(0x2b4),
182
183 NOP(5),
184 LRI(9, POSTED),
185 REG16(0x3a8),
186 REG16(0x28c),
187 REG16(0x288),
188 REG16(0x284),
189 REG16(0x280),
190 REG16(0x27c),
191 REG16(0x278),
192 REG16(0x274),
193 REG16(0x270),
194
35b22649 195 0
dd08ebf6
MB
196};
197
198static const u8 dg2_xcs_offsets[] = {
199 NOP(1),
200 LRI(15, POSTED),
201 REG16(0x244),
202 REG(0x034),
203 REG(0x030),
204 REG(0x038),
205 REG(0x03c),
206 REG(0x168),
207 REG(0x140),
208 REG(0x110),
209 REG(0x1c0),
210 REG(0x1c4),
211 REG(0x1c8),
212 REG(0x180),
213 REG16(0x2b4),
214 REG(0x120),
215 REG(0x124),
216
217 NOP(1),
218 LRI(9, POSTED),
219 REG16(0x3a8),
220 REG16(0x28c),
221 REG16(0x288),
222 REG16(0x284),
223 REG16(0x280),
224 REG16(0x27c),
225 REG16(0x278),
226 REG16(0x274),
227 REG16(0x270),
228
35b22649 229 0
dd08ebf6
MB
230};
231
232static const u8 gen12_rcs_offsets[] = {
233 NOP(1),
234 LRI(13, POSTED),
235 REG16(0x244),
236 REG(0x034),
237 REG(0x030),
238 REG(0x038),
239 REG(0x03c),
240 REG(0x168),
241 REG(0x140),
242 REG(0x110),
243 REG(0x1c0),
244 REG(0x1c4),
245 REG(0x1c8),
246 REG(0x180),
247 REG16(0x2b4),
248
249 NOP(5),
250 LRI(9, POSTED),
251 REG16(0x3a8),
252 REG16(0x28c),
253 REG16(0x288),
254 REG16(0x284),
255 REG16(0x280),
256 REG16(0x27c),
257 REG16(0x278),
258 REG16(0x274),
259 REG16(0x270),
260
261 LRI(3, POSTED),
262 REG(0x1b0),
263 REG16(0x5a8),
264 REG16(0x5ac),
265
266 NOP(6),
267 LRI(1, 0),
268 REG(0x0c8),
269 NOP(3 + 9 + 1),
270
271 LRI(51, POSTED),
272 REG16(0x588),
273 REG16(0x588),
274 REG16(0x588),
275 REG16(0x588),
276 REG16(0x588),
277 REG16(0x588),
278 REG(0x028),
279 REG(0x09c),
280 REG(0x0c0),
281 REG(0x178),
282 REG(0x17c),
283 REG16(0x358),
284 REG(0x170),
285 REG(0x150),
286 REG(0x154),
287 REG(0x158),
288 REG16(0x41c),
289 REG16(0x600),
290 REG16(0x604),
291 REG16(0x608),
292 REG16(0x60c),
293 REG16(0x610),
294 REG16(0x614),
295 REG16(0x618),
296 REG16(0x61c),
297 REG16(0x620),
298 REG16(0x624),
299 REG16(0x628),
300 REG16(0x62c),
301 REG16(0x630),
302 REG16(0x634),
303 REG16(0x638),
304 REG16(0x63c),
305 REG16(0x640),
306 REG16(0x644),
307 REG16(0x648),
308 REG16(0x64c),
309 REG16(0x650),
310 REG16(0x654),
311 REG16(0x658),
312 REG16(0x65c),
313 REG16(0x660),
314 REG16(0x664),
315 REG16(0x668),
316 REG16(0x66c),
317 REG16(0x670),
318 REG16(0x674),
319 REG16(0x678),
320 REG16(0x67c),
321 REG(0x068),
322 REG(0x084),
323 NOP(1),
324
35b22649 325 0
dd08ebf6
MB
326};
327
328static const u8 xehp_rcs_offsets[] = {
329 NOP(1),
330 LRI(13, POSTED),
331 REG16(0x244),
332 REG(0x034),
333 REG(0x030),
334 REG(0x038),
335 REG(0x03c),
336 REG(0x168),
337 REG(0x140),
338 REG(0x110),
339 REG(0x1c0),
340 REG(0x1c4),
341 REG(0x1c8),
342 REG(0x180),
343 REG16(0x2b4),
344
345 NOP(5),
346 LRI(9, POSTED),
347 REG16(0x3a8),
348 REG16(0x28c),
349 REG16(0x288),
350 REG16(0x284),
351 REG16(0x280),
352 REG16(0x27c),
353 REG16(0x278),
354 REG16(0x274),
355 REG16(0x270),
356
357 LRI(3, POSTED),
358 REG(0x1b0),
359 REG16(0x5a8),
360 REG16(0x5ac),
361
362 NOP(6),
363 LRI(1, 0),
364 REG(0x0c8),
365
35b22649 366 0
dd08ebf6
MB
367};
368
369static const u8 dg2_rcs_offsets[] = {
370 NOP(1),
371 LRI(15, POSTED),
372 REG16(0x244),
373 REG(0x034),
374 REG(0x030),
375 REG(0x038),
376 REG(0x03c),
377 REG(0x168),
378 REG(0x140),
379 REG(0x110),
380 REG(0x1c0),
381 REG(0x1c4),
382 REG(0x1c8),
383 REG(0x180),
384 REG16(0x2b4),
385 REG(0x120),
386 REG(0x124),
387
388 NOP(1),
389 LRI(9, POSTED),
390 REG16(0x3a8),
391 REG16(0x28c),
392 REG16(0x288),
393 REG16(0x284),
394 REG16(0x280),
395 REG16(0x27c),
396 REG16(0x278),
397 REG16(0x274),
398 REG16(0x270),
399
400 LRI(3, POSTED),
401 REG(0x1b0),
402 REG16(0x5a8),
403 REG16(0x5ac),
404
405 NOP(6),
406 LRI(1, 0),
407 REG(0x0c8),
408
35b22649 409 0
dd08ebf6
MB
410};
411
412static const u8 mtl_rcs_offsets[] = {
3e8e7ee6
FD
413 NOP(1),
414 LRI(15, POSTED),
415 REG16(0x244),
416 REG(0x034),
417 REG(0x030),
418 REG(0x038),
419 REG(0x03c),
420 REG(0x168),
421 REG(0x140),
422 REG(0x110),
423 REG(0x1c0),
424 REG(0x1c4),
425 REG(0x1c8),
426 REG(0x180),
427 REG16(0x2b4),
428 REG(0x120),
429 REG(0x124),
430
431 NOP(1),
432 LRI(9, POSTED),
433 REG16(0x3a8),
434 REG16(0x28c),
435 REG16(0x288),
436 REG16(0x284),
437 REG16(0x280),
438 REG16(0x27c),
439 REG16(0x278),
440 REG16(0x274),
441 REG16(0x270),
442
443 NOP(2),
444 LRI(2, POSTED),
445 REG16(0x5a8),
446 REG16(0x5ac),
447
448 NOP(6),
449 LRI(1, 0),
450 REG(0x0c8),
451
35b22649 452 0
dd08ebf6
MB
453};
454
c5fa5814
MR
455#define XE2_CTX_COMMON \
456 NOP(1), /* [0x00] */ \
457 LRI(15, POSTED), /* [0x01] */ \
458 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
459 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
460 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
461 REG(0x038), /* [0x08] RING_BUFFER_START */ \
462 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
463 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
464 REG(0x140), /* [0x0e] BB_ADDR */ \
465 REG(0x110), /* [0x10] BB_STATE */ \
466 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
467 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
468 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
469 REG(0x180), /* [0x18] CCID */ \
470 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
471 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
472 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
473 \
474 NOP(1), /* [0x20] */ \
475 LRI(9, POSTED), /* [0x21] */ \
476 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
477 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
478 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
479 REG16(0x284), /* [0x28] dummy reg */ \
480 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
481 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
482 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
483 REG16(0x274), /* [0x30] PTBP_UDW */ \
484 REG16(0x270) /* [0x32] PTBP_LDW */
485
486static const u8 xe2_rcs_offsets[] = {
487 XE2_CTX_COMMON,
488
489 NOP(2), /* [0x34] */
490 LRI(2, POSTED), /* [0x36] */
491 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
492 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
493
494 NOP(6), /* [0x41] */
495 LRI(1, 0), /* [0x47] */
496 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
497
35b22649 498 0
c5fa5814
MR
499};
500
501static const u8 xe2_bcs_offsets[] = {
502 XE2_CTX_COMMON,
503
504 NOP(4 + 8 + 1), /* [0x34] */
505 LRI(2, POSTED), /* [0x41] */
506 REG16(0x200), /* [0x42] BCS_SWCTRL */
507 REG16(0x204), /* [0x44] BLIT_CCTL */
508
35b22649 509 0
c5fa5814
MR
510};
511
512static const u8 xe2_xcs_offsets[] = {
513 XE2_CTX_COMMON,
514
35b22649 515 0
c5fa5814
MR
516};
517
d6219e1c
NV
518static const u8 xe2_indirect_ring_state_offsets[] = {
519 NOP(1), /* [0x00] */
520 LRI(5, POSTED), /* [0x01] */
521 REG(0x034), /* [0x02] RING_BUFFER_HEAD */
522 REG(0x030), /* [0x04] RING_BUFFER_TAIL */
523 REG(0x038), /* [0x06] RING_BUFFER_START */
524 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
525 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
526
527 NOP(5), /* [0x0c] */
528 LRI(9, POSTED), /* [0x11] */
529 REG(0x168), /* [0x12] BB_ADDR_UDW */
530 REG(0x140), /* [0x14] BB_ADDR */
531 REG(0x110), /* [0x16] BB_STATE */
532 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
533 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
534 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
535 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
536 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
537 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
538
539 NOP(12), /* [0x00] */
540
541 0
542};
543
dd08ebf6
MB
544#undef REG16
545#undef REG
546#undef LRI
547#undef NOP
548
549static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
550{
551 if (class == XE_ENGINE_CLASS_RENDER) {
c5fa5814
MR
552 if (GRAPHICS_VER(xe) >= 20)
553 return xe2_rcs_offsets;
554 else if (GRAPHICS_VERx100(xe) >= 1270)
dd08ebf6
MB
555 return mtl_rcs_offsets;
556 else if (GRAPHICS_VERx100(xe) >= 1255)
557 return dg2_rcs_offsets;
558 else if (GRAPHICS_VERx100(xe) >= 1250)
559 return xehp_rcs_offsets;
560 else
561 return gen12_rcs_offsets;
c5fa5814
MR
562 } else if (class == XE_ENGINE_CLASS_COPY) {
563 if (GRAPHICS_VER(xe) >= 20)
564 return xe2_bcs_offsets;
565 else
566 return gen12_xcs_offsets;
dd08ebf6 567 } else {
c5fa5814
MR
568 if (GRAPHICS_VER(xe) >= 20)
569 return xe2_xcs_offsets;
570 else if (GRAPHICS_VERx100(xe) >= 1255)
dd08ebf6
MB
571 return dg2_xcs_offsets;
572 else
573 return gen12_xcs_offsets;
574 }
575}
576
4ab5901c 577static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
dd08ebf6 578{
dc30c6e7
AD
579 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
580 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
dd08ebf6 581
d6219e1c
NV
582 if (xe_gt_has_indirect_ring_state(hwe->gt))
583 regs[CTX_CONTEXT_CONTROL] |=
584 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
585
dd08ebf6
MB
586 /* TODO: Timestamp */
587}
588
9a30b04f
MW
589static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
590{
4157849c 591 struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
9a30b04f 592 struct xe_device *xe = gt_to_xe(hwe->gt);
21d07f5f 593 u8 num_regs;
9a30b04f 594
6fa86e7a 595 if (!xe_device_uses_memirq(xe))
9a30b04f
MW
596 return;
597
598 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
599 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
600 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
601 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
602
21d07f5f
IL
603 num_regs = xe_device_has_msix(xe) ? 3 : 2;
604 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
9a30b04f
MW
605 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
606 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
ef6103d2 607 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
9a30b04f 608 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
ef6103d2 609 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
21d07f5f
IL
610
611 if (xe_device_has_msix(xe)) {
612 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
613 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
614 }
9a30b04f
MW
615}
616
dd08ebf6
MB
617static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
618{
619 struct xe_device *xe = gt_to_xe(hwe->gt);
620
621 if (GRAPHICS_VERx100(xe) >= 1250)
622 return 0x70;
623 else
624 return 0x60;
625}
626
627static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
628{
629 int x;
630
631 x = lrc_ring_mi_mode(hwe);
632 regs[x + 1] &= ~STOP_RING;
633 regs[x + 1] |= STOP_RING << 16;
634}
635
d6219e1c
NV
636static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
637{
638 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
639}
640
dd08ebf6
MB
641static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
642{
643 return 0;
644}
645
646u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
647{
648 return lrc->ring.size;
649}
650
651/* Make the magic macros work */
652#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
2f4a730f 653#define __xe_lrc_regs_offset xe_lrc_regs_offset
dd08ebf6
MB
654
655#define LRC_SEQNO_PPHWSP_OFFSET 512
f5b85ab6 656#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
8b9544e0 657#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
dd08ebf6 658#define LRC_PARALLEL_PPHWSP_OFFSET 2048
617d824c 659#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
dd08ebf6 660
2f4a730f
AD
661u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
662{
663 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
664}
665
dd08ebf6
MB
666static size_t lrc_reg_size(struct xe_device *xe)
667{
668 if (GRAPHICS_VERx100(xe) >= 1250)
669 return 96 * sizeof(u32);
670 else
671 return 80 * sizeof(u32);
672}
673
674size_t xe_lrc_skip_size(struct xe_device *xe)
675{
676 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
677}
678
679static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
680{
681 /* The seqno is stored in the driver-defined portion of PPHWSP */
682 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
683}
684
685static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
686{
687 /* The start seqno is stored in the driver-defined portion of PPHWSP */
688 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
689}
690
8b9544e0
MB
691static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
692{
66c8f7b4 693 /* This is stored in the driver-defined portion of PPHWSP */
8b9544e0
MB
694 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
695}
696
dd08ebf6
MB
697static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
698{
699 /* The parallel is stored in the driver-defined portion of PPHWSP */
700 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
701}
702
617d824c
UNR
703static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
704{
705 return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
706}
707
8b9544e0
MB
708static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
709{
710 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
711}
712
617d824c
UNR
713static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
714{
715 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
716}
717
d6219e1c
NV
718static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
719{
720 /* Indirect ring state page is at the very end of LRC */
721 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
722}
723
dd08ebf6
MB
724#define DECL_MAP_ADDR_HELPERS(elem) \
725static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
726{ \
727 struct iosys_map map = lrc->bo->vmap; \
728\
c73acc1e 729 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
dd08ebf6
MB
730 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
731 return map; \
732} \
51e9ddc6 733static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
dd08ebf6
MB
734{ \
735 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
736} \
737
738DECL_MAP_ADDR_HELPERS(ring)
739DECL_MAP_ADDR_HELPERS(pphwsp)
740DECL_MAP_ADDR_HELPERS(seqno)
741DECL_MAP_ADDR_HELPERS(regs)
742DECL_MAP_ADDR_HELPERS(start_seqno)
8b9544e0
MB
743DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
744DECL_MAP_ADDR_HELPERS(ctx_timestamp)
617d824c 745DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
dd08ebf6 746DECL_MAP_ADDR_HELPERS(parallel)
d6219e1c 747DECL_MAP_ADDR_HELPERS(indirect_ring)
617d824c 748DECL_MAP_ADDR_HELPERS(engine_id)
dd08ebf6
MB
749
750#undef DECL_MAP_ADDR_HELPERS
751
8b9544e0
MB
752/**
753 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
754 * @lrc: Pointer to the lrc.
755 *
756 * Returns: ctx timestamp GGTT address
757 */
758u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
759{
760 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
761}
762
617d824c
UNR
763/**
764 * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
765 * @lrc: Pointer to the lrc.
766 *
767 * Returns: ctx timestamp udw GGTT address
768 */
769u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
770{
771 return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
772}
773
8b9544e0
MB
774/**
775 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
776 * @lrc: Pointer to the lrc.
777 *
778 * Returns: ctx timestamp value
779 */
617d824c 780u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
8b9544e0
MB
781{
782 struct xe_device *xe = lrc_to_xe(lrc);
783 struct iosys_map map;
617d824c 784 u32 ldw, udw = 0;
8b9544e0
MB
785
786 map = __xe_lrc_ctx_timestamp_map(lrc);
617d824c
UNR
787 ldw = xe_map_read32(xe, &map);
788
789 if (xe->info.has_64bit_timestamp) {
790 map = __xe_lrc_ctx_timestamp_udw_map(lrc);
791 udw = xe_map_read32(xe, &map);
792 }
793
794 return (u64)udw << 32 | ldw;
8b9544e0
MB
795}
796
797/**
798 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
799 * @lrc: Pointer to the lrc.
800 *
801 * Returns: ctx timestamp job GGTT address
802 */
803u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
804{
805 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
806}
807
808/**
809 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
810 * @lrc: Pointer to the lrc.
811 *
812 * Returns: ctx timestamp job value
813 */
814u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
815{
816 struct xe_device *xe = lrc_to_xe(lrc);
817 struct iosys_map map;
818
819 map = __xe_lrc_ctx_job_timestamp_map(lrc);
820 return xe_map_read32(xe, &map);
821}
822
dd08ebf6
MB
823u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
824{
825 return __xe_lrc_pphwsp_ggtt_addr(lrc);
826}
827
d6219e1c
NV
828u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
829{
830 if (!xe_lrc_has_indirect_ring_state(lrc))
831 return 0;
832
833 return __xe_lrc_indirect_ring_ggtt_addr(lrc);
834}
835
836static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
837{
838 struct xe_device *xe = lrc_to_xe(lrc);
839 struct iosys_map map;
840
841 map = __xe_lrc_indirect_ring_map(lrc);
842 iosys_map_incr(&map, reg_nr * sizeof(u32));
843 return xe_map_read32(xe, &map);
844}
845
846static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
847 int reg_nr, u32 val)
848{
849 struct xe_device *xe = lrc_to_xe(lrc);
850 struct iosys_map map;
851
852 map = __xe_lrc_indirect_ring_map(lrc);
853 iosys_map_incr(&map, reg_nr * sizeof(u32));
854 xe_map_write32(xe, &map, val);
855}
856
dd08ebf6
MB
857u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
858{
859 struct xe_device *xe = lrc_to_xe(lrc);
860 struct iosys_map map;
861
862 map = __xe_lrc_regs_map(lrc);
863 iosys_map_incr(&map, reg_nr * sizeof(u32));
864 return xe_map_read32(xe, &map);
865}
866
867void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
868{
869 struct xe_device *xe = lrc_to_xe(lrc);
870 struct iosys_map map;
871
872 map = __xe_lrc_regs_map(lrc);
873 iosys_map_incr(&map, reg_nr * sizeof(u32));
874 xe_map_write32(xe, &map, val);
875}
876
877static void *empty_lrc_data(struct xe_hw_engine *hwe)
878{
d6219e1c 879 struct xe_gt *gt = hwe->gt;
dd08ebf6
MB
880 void *data;
881 u32 *regs;
882
d6219e1c 883 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
dd08ebf6
MB
884 if (!data)
885 return NULL;
886
887 /* 1st page: Per-Process of HW status Page */
888 regs = data + LRC_PPHWSP_SIZE;
d6219e1c 889 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
dd08ebf6 890 set_context_control(regs, hwe);
9a30b04f 891 set_memory_based_intr(regs, hwe);
dd08ebf6 892 reset_stop_ring(regs, hwe);
d6219e1c
NV
893 if (xe_gt_has_indirect_ring_state(gt)) {
894 regs = data + xe_gt_lrc_size(gt, hwe->class) -
895 LRC_INDIRECT_RING_STATE_SIZE;
896 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
897 }
dd08ebf6
MB
898
899 return data;
900}
901
902static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
903{
ce15563e 904 u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
dd08ebf6
MB
905
906 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
907 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
908}
909
264eecdb
NV
910static void xe_lrc_finish(struct xe_lrc *lrc)
911{
912 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
2b0a0ce0 913 xe_bo_unpin_map_no_vm(lrc->bo);
afcad924
MB
914}
915
916static size_t wa_bb_offset(struct xe_lrc *lrc)
917{
918 return lrc->bo->size - LRC_WA_BB_SIZE;
617d824c
UNR
919}
920
921/*
922 * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
923 * context run ticks.
924 * @lrc: Pointer to the lrc.
925 *
926 * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
927 * context, but only gets updated when the context switches out. In order to
928 * check how long a context has been active before it switches out, two things
929 * are required:
930 *
931 * (1) Determine if the context is running:
932 * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
933 * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
934 * initialized. During a query, we just check for this value to determine if the
935 * context is active. If the context switched out, it would overwrite this
936 * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
937 * the last part of context restore, so reusing this LRC location will not
938 * clobber anything.
939 *
940 * (2) Calculate the time that the context has been active for:
941 * The CTX_TIMESTAMP ticks only when the context is active. If a context is
942 * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
943 * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
944 * engine instance. Since we do not know which instance the context is running
945 * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
946 * store it in the PPHSWP.
947 */
948#define CONTEXT_ACTIVE 1ULL
9c7632fa 949static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
617d824c 950{
afcad924 951 const size_t max_size = LRC_WA_BB_SIZE;
9c7632fa 952 u32 *cmd, *buf = NULL;
617d824c 953
afcad924
MB
954 if (lrc->bo->vmap.is_iomem) {
955 buf = kmalloc(max_size, GFP_KERNEL);
9c7632fa
LDM
956 if (!buf)
957 return -ENOMEM;
958 cmd = buf;
959 } else {
afcad924 960 cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
9c7632fa 961 }
617d824c
UNR
962
963 *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
964 *cmd++ = ENGINE_ID(0).addr;
965 *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
966 *cmd++ = 0;
967
968 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
969 *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
970 *cmd++ = 0;
971 *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
972
973 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
974 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
975 *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
976 *cmd++ = 0;
977 *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
978 }
979
980 *cmd++ = MI_BATCH_BUFFER_END;
981
9c7632fa 982 if (buf) {
afcad924
MB
983 xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
984 wa_bb_offset(lrc), buf,
985 (cmd - buf) * sizeof(*cmd));
9c7632fa
LDM
986 kfree(buf);
987 }
988
afcad924
MB
989 xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
990 wa_bb_offset(lrc) + 1);
617d824c 991
9c7632fa 992 return 0;
264eecdb
NV
993}
994
dd08ebf6
MB
995#define PVC_CTX_ASID (0x2e + 1)
996#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
dd08ebf6 997
264eecdb 998static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
72d47960
DCS
999 struct xe_vm *vm, u32 ring_size, u16 msix_vec,
1000 u32 init_flags)
dd08ebf6
MB
1001{
1002 struct xe_gt *gt = hwe->gt;
876611c2 1003 struct xe_tile *tile = gt_to_tile(gt);
dd08ebf6
MB
1004 struct xe_device *xe = gt_to_xe(gt);
1005 struct iosys_map map;
1006 void *init_data = NULL;
1007 u32 arb_enable;
d6219e1c 1008 u32 lrc_size;
617d824c 1009 u32 bo_flags;
dd08ebf6
MB
1010 int err;
1011
264eecdb 1012 kref_init(&lrc->refcount);
ce15563e 1013 lrc->gt = gt;
dd08ebf6 1014 lrc->flags = 0;
d6219e1c
NV
1015 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
1016 if (xe_gt_has_indirect_ring_state(gt))
1017 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
dd08ebf6 1018
617d824c
UNR
1019 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
1020 XE_BO_FLAG_GGTT_INVALIDATE;
7f387e60
MA
1021 if (vm && vm->xef) /* userspace */
1022 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
617d824c 1023
e103c45f
MA
1024 /*
1025 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
1026 * via VM bind calls.
1027 */
afcad924
MB
1028 lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
1029 lrc_size + LRC_WA_BB_SIZE,
d6219e1c 1030 ttm_bo_type_kernel,
617d824c 1031 bo_flags);
dd08ebf6
MB
1032 if (IS_ERR(lrc->bo))
1033 return PTR_ERR(lrc->bo);
1034
d6219e1c 1035 lrc->size = lrc_size;
dd08ebf6
MB
1036 lrc->ring.size = ring_size;
1037 lrc->ring.tail = 0;
1038
1039 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
1040 hwe->fence_irq, hwe->name);
1041
1042 if (!gt->default_lrc[hwe->class]) {
1043 init_data = empty_lrc_data(hwe);
1044 if (!init_data) {
e103c45f
MA
1045 err = -ENOMEM;
1046 goto err_lrc_finish;
dd08ebf6
MB
1047 }
1048 }
1049
1050 /*
1051 * Init Per-Process of HW status Page, LRC / context state to known
1052 * values
1053 */
1054 map = __xe_lrc_pphwsp_map(lrc);
1055 if (!init_data) {
1056 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
1057 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
1058 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
d6219e1c 1059 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
dd08ebf6
MB
1060 } else {
1061 xe_map_memcpy_to(xe, &map, 0, init_data,
d6219e1c 1062 xe_gt_lrc_size(gt, hwe->class));
dd08ebf6
MB
1063 kfree(init_data);
1064 }
1065
303fb116 1066 if (vm) {
dd08ebf6
MB
1067 xe_lrc_set_ppgtt(lrc, vm);
1068
303fb116
TU
1069 if (vm->xef)
1070 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
1071 }
1072
21d07f5f
IL
1073 if (xe_device_has_msix(xe)) {
1074 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
1075 xe_memirq_status_ptr(&tile->memirq, hwe));
1076 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
1077 xe_memirq_source_ptr(&tile->memirq, hwe));
1078 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
1079 }
1080
d6219e1c
NV
1081 if (xe_gt_has_indirect_ring_state(gt)) {
1082 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
1083 __xe_lrc_indirect_ring_ggtt_addr(lrc));
1084
1085 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
1086 __xe_lrc_ring_ggtt_addr(lrc));
1087 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
1088 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
1089 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
1090 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
1091 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1092 } else {
1093 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
1094 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
1095 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
1096 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
1097 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1098 }
1099
72d47960
DCS
1100 if (init_flags & XE_LRC_CREATE_RUNALONE)
1101 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1102 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1103 _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
1104
1105 if (init_flags & XE_LRC_CREATE_PXP)
1106 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1107 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1108 _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
1109
617d824c 1110 lrc->ctx_timestamp = 0;
9b090d57 1111 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
617d824c
UNR
1112 if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1113 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
9b090d57 1114
5669899e 1115 if (xe->info.has_asid && vm)
84a1ed5e 1116 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
dd08ebf6 1117
152ca51d 1118 lrc->desc = LRC_VALID;
cf03825b 1119 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
dd08ebf6
MB
1120 /* TODO: Priority */
1121
1122 /* While this appears to have something about privileged batches or
1123 * some such, it really just means PPGTT mode.
1124 */
1125 if (vm)
152ca51d 1126 lrc->desc |= LRC_PRIVILEGE;
dd08ebf6
MB
1127
1128 if (GRAPHICS_VERx100(xe) < 1250) {
cf03825b
NV
1129 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1130 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
dd08ebf6
MB
1131 }
1132
1133 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1134 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1135
7c51050b
TH
1136 map = __xe_lrc_seqno_map(lrc);
1137 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1138
7500477d
MA
1139 map = __xe_lrc_start_seqno_map(lrc);
1140 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1141
9c7632fa
LDM
1142 err = xe_lrc_setup_utilization(lrc);
1143 if (err)
1144 goto err_lrc_finish;
617d824c 1145
dd08ebf6
MB
1146 return 0;
1147
e103c45f
MA
1148err_lrc_finish:
1149 xe_lrc_finish(lrc);
dd08ebf6
MB
1150 return err;
1151}
1152
877517f2
NV
1153/**
1154 * xe_lrc_create - Create a LRC
1155 * @hwe: Hardware Engine
1156 * @vm: The VM (address space)
1157 * @ring_size: LRC ring size
21d07f5f 1158 * @msix_vec: MSI-X interrupt vector (for platforms that support it)
72d47960 1159 * @flags: LRC initialization flags
877517f2
NV
1160 *
1161 * Allocate and initialize the Logical Ring Context (LRC).
1162 *
1163 * Return pointer to created LRC upon success and an error pointer
1164 * upon failure.
1165 */
264eecdb 1166struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
72d47960 1167 u32 ring_size, u16 msix_vec, u32 flags)
dd08ebf6 1168{
264eecdb
NV
1169 struct xe_lrc *lrc;
1170 int err;
1171
1172 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1173 if (!lrc)
1174 return ERR_PTR(-ENOMEM);
1175
72d47960 1176 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
264eecdb
NV
1177 if (err) {
1178 kfree(lrc);
1179 return ERR_PTR(err);
1180 }
1181
1182 return lrc;
1183}
1184
877517f2
NV
1185/**
1186 * xe_lrc_destroy - Destroy the LRC
1187 * @ref: reference to LRC
1188 *
1189 * Called when ref == 0, release resources held by the Logical Ring Context
1190 * (LRC) and free the LRC memory.
1191 */
264eecdb
NV
1192void xe_lrc_destroy(struct kref *ref)
1193{
1194 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1195
1196 xe_lrc_finish(lrc);
1197 kfree(lrc);
dd08ebf6
MB
1198}
1199
d6219e1c
NV
1200void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1201{
1202 if (xe_lrc_has_indirect_ring_state(lrc))
1203 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1204 else
1205 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1206}
1207
1208u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1209{
1210 if (xe_lrc_has_indirect_ring_state(lrc))
1211 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1212 else
1213 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1214}
1215
9a1fce9d
MB
1216static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1217{
1218 if (xe_lrc_has_indirect_ring_state(lrc))
1219 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1220 else
1221 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1222}
1223
dd08ebf6
MB
1224void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1225{
d6219e1c
NV
1226 if (xe_lrc_has_indirect_ring_state(lrc))
1227 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1228 else
1229 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
dd08ebf6
MB
1230}
1231
1232u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1233{
d6219e1c
NV
1234 if (xe_lrc_has_indirect_ring_state(lrc))
1235 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1236 else
1237 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
dd08ebf6
MB
1238}
1239
1240u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1241{
1242 const u32 head = xe_lrc_ring_head(lrc);
1243 const u32 tail = lrc->ring.tail;
1244 const u32 size = lrc->ring.size;
1245
1246 return ((head - tail - 1) & (size - 1)) + 1;
1247}
1248
1249static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1250 const void *data, size_t size)
1251{
1252 struct xe_device *xe = lrc_to_xe(lrc);
1253
1254 iosys_map_incr(&ring, lrc->ring.tail);
1255 xe_map_memcpy_to(xe, &ring, 0, data, size);
1256 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1257}
1258
1259void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1260{
c73acc1e 1261 struct xe_device *xe = lrc_to_xe(lrc);
dd08ebf6
MB
1262 struct iosys_map ring;
1263 u32 rhs;
1264 size_t aligned_size;
1265
c73acc1e 1266 xe_assert(xe, IS_ALIGNED(size, 4));
dd08ebf6
MB
1267 aligned_size = ALIGN(size, 8);
1268
1269 ring = __xe_lrc_ring_map(lrc);
1270
c73acc1e 1271 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
dd08ebf6
MB
1272 rhs = lrc->ring.size - lrc->ring.tail;
1273 if (size > rhs) {
1274 __xe_lrc_write_ring(lrc, ring, data, rhs);
1275 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1276 } else {
1277 __xe_lrc_write_ring(lrc, ring, data, size);
1278 }
1279
1280 if (aligned_size > size) {
1281 u32 noop = MI_NOOP;
1282
1283 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1284 }
1285}
1286
1287u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1288{
1289 return lrc->desc | xe_lrc_ggtt_addr(lrc);
1290}
1291
1292u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1293{
1294 return __xe_lrc_seqno_ggtt_addr(lrc);
1295}
1296
e183910a
TH
1297/**
1298 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1299 *
1300 * Allocate but don't initialize an lrc seqno fence.
1301 *
1302 * Return: Pointer to the allocated fence or
1303 * negative error pointer on error.
1304 */
1305struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1306{
1307 return xe_hw_fence_alloc();
1308}
1309
1310/**
1311 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1312 * @fence: Pointer to the fence to free.
1313 *
1314 * Frees an lrc seqno fence that hasn't yet been
1315 * initialized.
1316 */
1317void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1318{
1319 xe_hw_fence_free(fence);
1320}
1321
1322/**
1323 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1324 * @lrc: Pointer to the lrc.
1325 * @fence: Pointer to the fence to initialize.
1326 *
1327 * Initializes a pre-allocated lrc seqno fence.
1328 * After initialization, the fence is subject to normal
1329 * dma-fence refcounting.
1330 */
1331void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1332{
1333 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1334}
1335
dd08ebf6
MB
1336s32 xe_lrc_seqno(struct xe_lrc *lrc)
1337{
1338 struct iosys_map map = __xe_lrc_seqno_map(lrc);
1339
1340 return xe_map_read32(lrc_to_xe(lrc), &map);
1341}
1342
1343s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1344{
1345 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1346
1347 return xe_map_read32(lrc_to_xe(lrc), &map);
1348}
1349
1350u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1351{
1352 return __xe_lrc_start_seqno_ggtt_addr(lrc);
1353}
1354
1355u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1356{
1357 return __xe_lrc_parallel_ggtt_addr(lrc);
1358}
1359
1360struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1361{
1362 return __xe_lrc_parallel_map(lrc);
1363}
0f60547f 1364
617d824c
UNR
1365/**
1366 * xe_lrc_engine_id() - Read engine id value
1367 * @lrc: Pointer to the lrc.
1368 *
1369 * Returns: context id value
1370 */
1371static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
1372{
1373 struct xe_device *xe = lrc_to_xe(lrc);
1374 struct iosys_map map;
1375
1376 map = __xe_lrc_engine_id_map(lrc);
1377 return xe_map_read32(xe, &map);
1378}
1379
0f60547f
MR
1380static int instr_dw(u32 cmd_header)
1381{
6de492ae
MR
1382 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1383 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1384 GFXPIPE_SINGLE_DW_CMD(0, 0))
1385 return 1;
1386
1387 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1388 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1389 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1390
0f60547f
MR
1391 /* Most instructions have the # of dwords (minus 2) in 7:0 */
1392 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1393}
1394
1395static int dump_mi_command(struct drm_printer *p,
1396 struct xe_gt *gt,
1397 u32 *dw,
1398 int remaining_dw)
1399{
1400 u32 inst_header = *dw;
1401 u32 numdw = instr_dw(inst_header);
1402 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1403 int num_noop;
1404
1405 /* First check for commands that don't have/use a '# DW' field */
1406 switch (inst_header & MI_OPCODE) {
1407 case MI_NOOP:
1408 num_noop = 1;
1409 while (num_noop < remaining_dw &&
1410 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1411 num_noop++;
1412 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1413 return num_noop;
1414
1415 case MI_TOPOLOGY_FILTER:
1416 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1417 return 1;
1418
1419 case MI_BATCH_BUFFER_END:
1420 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1421 /* Return 'remaining_dw' to consume the rest of the LRC */
1422 return remaining_dw;
1423 }
1424
1425 /*
1426 * Any remaining commands include a # of dwords. We should make sure
1427 * it doesn't exceed the remaining size of the LRC.
1428 */
1429 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1430 numdw = remaining_dw;
1431
1432 switch (inst_header & MI_OPCODE) {
1433 case MI_LOAD_REGISTER_IMM:
1434 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1435 inst_header, (numdw - 1) / 2);
1436 for (int i = 1; i < numdw; i += 2)
1437 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1438 return numdw;
1439
6901f732
MW
1440 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1441 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1442 inst_header,
1443 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1444 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1445 if (numdw == 4)
1446 drm_printf(p, " - %#6x = %#010llx\n",
1447 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1448 else
1449 drm_printf(p, " - %*ph (%s)\n",
1450 (int)sizeof(u32) * (numdw - 1), dw + 1,
1451 numdw < 4 ? "truncated" : "malformed");
1452 return numdw;
1453
0f60547f
MR
1454 case MI_FORCE_WAKEUP:
1455 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1456 return numdw;
1457
1458 default:
1459 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1460 inst_header, opcode, numdw);
1461 return numdw;
1462 }
1463}
1464
6de492ae
MR
1465static int dump_gfxpipe_command(struct drm_printer *p,
1466 struct xe_gt *gt,
1467 u32 *dw,
1468 int remaining_dw)
1469{
1470 u32 numdw = instr_dw(*dw);
1471 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1472 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1473 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1474
1475 /*
1476 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1477 * remaining size of the LRC.
1478 */
1479 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1480 numdw = remaining_dw;
1481
1482 switch (*dw & GFXPIPE_MATCH_MASK) {
1483#define MATCH(cmd) \
1484 case cmd: \
1485 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1486 return numdw
1487#define MATCH3D(cmd) \
1488 case CMD_##cmd: \
1489 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1490 return numdw
1491
1492 MATCH(STATE_BASE_ADDRESS);
1493 MATCH(STATE_SIP);
1494 MATCH(GPGPU_CSR_BASE_ADDRESS);
1495 MATCH(STATE_COMPUTE_MODE);
1496 MATCH3D(3DSTATE_BTD);
b9b7db49
MR
1497 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1498 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
6de492ae
MR
1499
1500 MATCH3D(3DSTATE_VF_STATISTICS);
1501
1502 MATCH(PIPELINE_SELECT);
1503
fb24b858 1504 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
6de492ae
MR
1505 MATCH3D(3DSTATE_CLEAR_PARAMS);
1506 MATCH3D(3DSTATE_DEPTH_BUFFER);
1507 MATCH3D(3DSTATE_STENCIL_BUFFER);
1508 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1509 MATCH3D(3DSTATE_VERTEX_BUFFERS);
72ac3047 1510 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
6de492ae
MR
1511 MATCH3D(3DSTATE_INDEX_BUFFER);
1512 MATCH3D(3DSTATE_VF);
72ac3047 1513 MATCH3D(3DSTATE_MULTISAMPLE);
6de492ae 1514 MATCH3D(3DSTATE_CC_STATE_POINTERS);
72ac3047
MR
1515 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1516 MATCH3D(3DSTATE_VS);
1517 MATCH3D(3DSTATE_GS);
1518 MATCH3D(3DSTATE_CLIP);
1519 MATCH3D(3DSTATE_SF);
6de492ae 1520 MATCH3D(3DSTATE_WM);
72ac3047
MR
1521 MATCH3D(3DSTATE_CONSTANT_VS);
1522 MATCH3D(3DSTATE_CONSTANT_GS);
b9b7db49 1523 MATCH3D(3DSTATE_CONSTANT_PS);
6de492ae 1524 MATCH3D(3DSTATE_SAMPLE_MASK);
72ac3047
MR
1525 MATCH3D(3DSTATE_CONSTANT_HS);
1526 MATCH3D(3DSTATE_CONSTANT_DS);
1527 MATCH3D(3DSTATE_HS);
1528 MATCH3D(3DSTATE_TE);
1529 MATCH3D(3DSTATE_DS);
1530 MATCH3D(3DSTATE_STREAMOUT);
6de492ae
MR
1531 MATCH3D(3DSTATE_SBE);
1532 MATCH3D(3DSTATE_PS);
72ac3047 1533 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
6de492ae
MR
1534 MATCH3D(3DSTATE_CPS_POINTERS);
1535 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1536 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
72ac3047
MR
1537 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1538 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1539 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1540 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
6de492ae 1541 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
72ac3047
MR
1542 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1543 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1544 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1545 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
6de492ae
MR
1546 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1547 MATCH3D(3DSTATE_VF_INSTANCING);
72ac3047 1548 MATCH3D(3DSTATE_VF_SGVS);
6de492ae
MR
1549 MATCH3D(3DSTATE_VF_TOPOLOGY);
1550 MATCH3D(3DSTATE_WM_CHROMAKEY);
1551 MATCH3D(3DSTATE_PS_BLEND);
1552 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1553 MATCH3D(3DSTATE_PS_EXTRA);
72ac3047 1554 MATCH3D(3DSTATE_RASTER);
6de492ae 1555 MATCH3D(3DSTATE_SBE_SWIZ);
72ac3047
MR
1556 MATCH3D(3DSTATE_WM_HZ_OP);
1557 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1558 MATCH3D(3DSTATE_VF_SGVS_2);
6de492ae 1559 MATCH3D(3DSTATE_VFG);
72ac3047
MR
1560 MATCH3D(3DSTATE_URB_ALLOC_VS);
1561 MATCH3D(3DSTATE_URB_ALLOC_HS);
1562 MATCH3D(3DSTATE_URB_ALLOC_DS);
1563 MATCH3D(3DSTATE_URB_ALLOC_GS);
1564 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1565 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1566 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1567 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1568 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1569 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
6de492ae
MR
1570 MATCH3D(3DSTATE_AMFS);
1571 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1572 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1573 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
72ac3047 1574 MATCH3D(3DSTATE_MESH_CONTROL);
6de492ae 1575 MATCH3D(3DSTATE_MESH_DISTRIB);
72ac3047
MR
1576 MATCH3D(3DSTATE_TASK_REDISTRIB);
1577 MATCH3D(3DSTATE_MESH_SHADER);
1578 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1579 MATCH3D(3DSTATE_TASK_CONTROL);
1580 MATCH3D(3DSTATE_TASK_SHADER);
1581 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1582 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1583 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1584 MATCH3D(3DSTATE_CLIP_MESH);
6de492ae
MR
1585 MATCH3D(3DSTATE_SBE_MESH);
1586 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
3b545b21 1587 MATCH3D(3DSTATE_COARSE_PIXEL);
6de492ae 1588
72ac3047 1589 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
6de492ae
MR
1590 MATCH3D(3DSTATE_CHROMA_KEY);
1591 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1592 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1593 MATCH3D(3DSTATE_LINE_STIPPLE);
1594 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1595 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1596 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1597 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1598 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1599 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1600 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1601 MATCH3D(3DSTATE_SO_DECL_LIST);
72ac3047 1602 MATCH3D(3DSTATE_SO_BUFFER);
6de492ae
MR
1603 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1604 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1605 MATCH3D(3DSTATE_3D_MODE);
1606 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1607 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1608 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1609
1610 default:
1611 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1612 *dw, pipeline, opcode, subopcode, numdw);
1613 return numdw;
1614 }
1615}
1616
b9b7db49
MR
1617static int dump_gfx_state_command(struct drm_printer *p,
1618 struct xe_gt *gt,
1619 u32 *dw,
1620 int remaining_dw)
1621{
1622 u32 numdw = instr_dw(*dw);
1623 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1624
1625 /*
1626 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1627 * remaining size of the LRC.
1628 */
1629 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1630 numdw = remaining_dw;
1631
1632 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1633 MATCH(STATE_WRITE_INLINE);
1634
1635 default:
1636 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1637 *dw, opcode, numdw);
1638 return numdw;
1639 }
1640}
1641
0f60547f
MR
1642void xe_lrc_dump_default(struct drm_printer *p,
1643 struct xe_gt *gt,
1644 enum xe_engine_class hwe_class)
1645{
1646 u32 *dw;
1647 int remaining_dw, num_dw;
1648
1649 if (!gt->default_lrc[hwe_class]) {
1650 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1651 return;
1652 }
1653
1654 /*
1655 * Skip the beginning of the LRC since it contains the per-process
1656 * hardware status page.
1657 */
1658 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
d6219e1c 1659 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
0f60547f
MR
1660
1661 while (remaining_dw > 0) {
1662 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1663 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
6de492ae
MR
1664 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1665 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
b9b7db49
MR
1666 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1667 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
0f60547f
MR
1668 } else {
1669 num_dw = min(instr_dw(*dw), remaining_dw);
1670 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1671 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1672 num_dw);
1673 }
1674
1675 dw += num_dw;
1676 remaining_dw -= num_dw;
1677 }
1678}
b1543a49
MR
1679
1680struct instr_state {
1681 u32 instr;
1682 u16 num_dw;
1683};
1684
72ac3047
MR
1685static const struct instr_state xe_hpg_svg_state[] = {
1686 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1687 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1688 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1689 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1690 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1691 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1692 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1693 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1694 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1695 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1696 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1697 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1698 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1699 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1700 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1701 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1702 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1703 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1704 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1705 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1706 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1707 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1708 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1709 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1710 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1711 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1712 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1713 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1714 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1715 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1716 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1717 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1718 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1719 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1720 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1721 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1722 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1723 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1724 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1725 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1726 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1727 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1728 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1729 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1730 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1731 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1732 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1733 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1734 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1735 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1736};
1737
b1543a49
MR
1738void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1739{
1740 struct xe_gt *gt = q->hwe->gt;
1741 struct xe_device *xe = gt_to_xe(gt);
1742 const struct instr_state *state_table = NULL;
1743 int state_table_size = 0;
1744
1745 /*
1d734a3e
MR
1746 * Wa_14019789679
1747 *
1748 * If the driver doesn't explicitly emit the SVG instructions while
1749 * setting up the default LRC, the context switch will write 0's
1750 * (noops) into the LRC memory rather than the expected instruction
1751 * headers. Application contexts start out as a copy of the default
1752 * LRC, and if they also do not emit specific settings for some SVG
1753 * state, then on context restore they'll unintentionally inherit
1754 * whatever state setting the previous context had programmed into the
1755 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1756 * prevent the hardware from resetting that state back to any specific
1757 * value).
1758 *
1759 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1760 * since that's a specific state setting that can easily cause GPU
1761 * hangs if unintentionally inherited. However to be safe we'll
1762 * continue to emit all of the SVG state since it's best not to leak
1763 * any of the state between contexts, even if that leakage is harmless.
b1543a49 1764 */
1d734a3e 1765 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
72ac3047
MR
1766 state_table = xe_hpg_svg_state;
1767 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1d734a3e
MR
1768 }
1769
1770 if (!state_table) {
b1543a49
MR
1771 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1772 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1773 return;
1774 }
1775
1776 for (int i = 0; i < state_table_size; i++) {
1777 u32 instr = state_table[i].instr;
1778 u16 num_dw = state_table[i].num_dw;
1779 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1780
1781 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1782 xe_gt_assert(gt, num_dw != 0);
1783 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1784
fb24b858
MR
1785 /*
1786 * Xe2's SVG context is the same as the one on DG2 / MTL
1787 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1788 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1789 * Just make the replacement here rather than defining a
1790 * whole separate table for the single trivial change.
1791 */
1792 if (GRAPHICS_VER(xe) >= 20 &&
1793 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1794 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1795
b1543a49
MR
1796 bb->cs[bb->len] = instr;
1797 if (!is_single_dw)
1798 bb->cs[bb->len] |= (num_dw - 2);
1799
1800 bb->len += num_dw;
1801 }
1802}
47058633
ML
1803
1804struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1805{
1806 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1807
1808 if (!snapshot)
1809 return NULL;
1810
85cfc412 1811 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
37aa19fa 1812 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
d6219e1c 1813 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
47058633
ML
1814 snapshot->head = xe_lrc_ring_head(lrc);
1815 snapshot->tail.internal = lrc->ring.tail;
d6219e1c 1816 snapshot->tail.memory = xe_lrc_ring_tail(lrc);
9a1fce9d 1817 snapshot->start = xe_lrc_ring_start(lrc);
47058633
ML
1818 snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1819 snapshot->seqno = xe_lrc_seqno(lrc);
4d5242a0
ML
1820 snapshot->lrc_bo = xe_bo_get(lrc->bo);
1821 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
afcad924
MB
1822 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
1823 LRC_WA_BB_SIZE;
4d5242a0 1824 snapshot->lrc_snapshot = NULL;
617d824c 1825 snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
1bf1ca4e 1826 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
47058633
ML
1827 return snapshot;
1828}
1829
784b3410
ML
1830void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1831{
4d5242a0
ML
1832 struct xe_bo *bo;
1833 struct iosys_map src;
1834
784b3410
ML
1835 if (!snapshot)
1836 return;
1837
4d5242a0
ML
1838 bo = snapshot->lrc_bo;
1839 snapshot->lrc_bo = NULL;
1840
1841 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1842 if (!snapshot->lrc_snapshot)
1843 goto put_bo;
1844
c79828e0 1845 xe_bo_lock(bo, false);
4d5242a0
ML
1846 if (!ttm_bo_vmap(&bo->ttm, &src)) {
1847 xe_map_memcpy_from(xe_bo_device(bo),
1848 snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1849 snapshot->lrc_size);
1850 ttm_bo_vunmap(&bo->ttm, &src);
1851 } else {
1852 kvfree(snapshot->lrc_snapshot);
1853 snapshot->lrc_snapshot = NULL;
1854 }
c79828e0 1855 xe_bo_unlock(bo);
4d5242a0
ML
1856put_bo:
1857 xe_bo_put(bo);
784b3410
ML
1858}
1859
47058633
ML
1860void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1861{
4d5242a0
ML
1862 unsigned long i;
1863
47058633
ML
1864 if (!snapshot)
1865 return;
1866
1867 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
37aa19fa
MB
1868 drm_printf(p, "\tHW Ring address: 0x%08x\n",
1869 snapshot->ring_addr);
d6219e1c
NV
1870 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1871 snapshot->indirect_context_desc);
47058633
ML
1872 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1873 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1874 snapshot->tail.internal, snapshot->tail.memory);
9a1fce9d 1875 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
47058633
ML
1876 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1877 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1bf1ca4e
MB
1878 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1879 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
4d5242a0
ML
1880
1881 if (!snapshot->lrc_snapshot)
1882 return;
1883
1884 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1885 drm_puts(p, "\t[HWSP].data: ");
1886 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1887 u32 *val = snapshot->lrc_snapshot + i;
1888 char dumped[ASCII85_BUFSZ];
1889
1890 drm_puts(p, ascii85_encode(*val, dumped));
1891 }
1892
1893 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1894 drm_puts(p, "\t[HWCTX].data: ");
1895 for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1896 u32 *val = snapshot->lrc_snapshot + i;
1897 char dumped[ASCII85_BUFSZ];
1898
1899 drm_puts(p, ascii85_encode(*val, dumped));
1900 }
1901 drm_puts(p, "\n");
47058633
ML
1902}
1903
1904void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1905{
4d5242a0
ML
1906 if (!snapshot)
1907 return;
1908
1909 kvfree(snapshot->lrc_snapshot);
2b0a0ce0 1910 if (snapshot->lrc_bo)
4d5242a0 1911 xe_bo_put(snapshot->lrc_bo);
2b0a0ce0 1912
47058633
ML
1913 kfree(snapshot);
1914}
9b090d57 1915
617d824c
UNR
1916static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
1917{
1918 u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
1919 u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
1920 struct xe_hw_engine *hwe;
1921 u64 val;
1922
1923 hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
1924 if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
1925 "Unexpected engine class:instance %d:%d for context utilization\n",
1926 class, instance))
1927 return -1;
1928
1929 if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1930 val = xe_mmio_read64_2x32(&hwe->gt->mmio,
1931 RING_CTX_TIMESTAMP(hwe->mmio_base));
1932 else
1933 val = xe_mmio_read32(&hwe->gt->mmio,
1934 RING_CTX_TIMESTAMP(hwe->mmio_base));
1935
1936 *reg_ctx_ts = val;
1937
1938 return 0;
1939}
1940
8b9544e0
MB
1941/**
1942 * xe_lrc_update_timestamp() - Update ctx timestamp
1943 * @lrc: Pointer to the lrc.
1944 * @old_ts: Old timestamp value
1945 *
1946 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
617d824c
UNR
1947 * update saved value. With support for active contexts, the calculation may be
1948 * slightly racy, so follow a read-again logic to ensure that the context is
1949 * still active before returning the right timestamp.
8b9544e0
MB
1950 *
1951 * Returns: New ctx timestamp value
1952 */
617d824c 1953u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
9b090d57 1954{
617d824c
UNR
1955 u64 lrc_ts, reg_ts;
1956 u32 engine_id;
1957
9b090d57
UNR
1958 *old_ts = lrc->ctx_timestamp;
1959
617d824c
UNR
1960 lrc_ts = xe_lrc_ctx_timestamp(lrc);
1961 /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
1962 if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
1963 lrc->ctx_timestamp = lrc_ts;
1964 goto done;
1965 }
1966
1967 if (lrc_ts == CONTEXT_ACTIVE) {
1968 engine_id = xe_lrc_engine_id(lrc);
1969 if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
1970 lrc->ctx_timestamp = reg_ts;
1971
1972 /* read lrc again to ensure context is still active */
1973 lrc_ts = xe_lrc_ctx_timestamp(lrc);
1974 }
1975
1976 /*
1977 * If context switched out, just use the lrc_ts. Note that this needs to
1978 * be a separate if condition.
1979 */
1980 if (lrc_ts != CONTEXT_ACTIVE)
1981 lrc->ctx_timestamp = lrc_ts;
9b090d57 1982
617d824c 1983done:
a7238ee3
LDM
1984 trace_xe_lrc_update_timestamp(lrc, *old_ts);
1985
9b090d57
UNR
1986 return lrc->ctx_timestamp;
1987}
10304796
MB
1988
1989/**
1990 * xe_lrc_ring_is_idle() - LRC is idle
1991 * @lrc: Pointer to the lrc.
1992 *
1993 * Compare LRC ring head and tail to determine if idle.
1994 *
1995 * Return: True is ring is idle, False otherwise
1996 */
1997bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1998{
1999 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
2000}