Merge tag 'kvm-x86-misc-6.9' of https://github.com/kvm-x86/linux into HEAD
[linux-2.6-block.git] / drivers / gpu / drm / xe / xe_lrc.c
CommitLineData
dd08ebf6
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_lrc.h"
7
0134f130 8#include "instructions/xe_mi_commands.h"
6de492ae 9#include "instructions/xe_gfxpipe_commands.h"
b79e8fd9 10#include "regs/xe_engine_regs.h"
63955b3b 11#include "regs/xe_gpu_commands.h"
0992884d 12#include "regs/xe_lrc_layout.h"
b1543a49 13#include "xe_bb.h"
dd08ebf6
MB
14#include "xe_bo.h"
15#include "xe_device.h"
303fb116 16#include "xe_drm_client.h"
9b9529ce 17#include "xe_exec_queue_types.h"
dd08ebf6 18#include "xe_gt.h"
0f60547f 19#include "xe_gt_printk.h"
dd08ebf6 20#include "xe_hw_fence.h"
ea9f879d 21#include "xe_map.h"
dd08ebf6
MB
22#include "xe_vm.h"
23
89642db3
MB
24#define LRC_VALID (1 << 0)
25#define LRC_PRIVILEGE (1 << 8)
26#define LRC_ADDRESSING_MODE_SHIFT 3
27#define LRC_LEGACY_64B_CONTEXT 3
dd08ebf6 28
0bc519d2
LDM
29#define ENGINE_CLASS_SHIFT 61
30#define ENGINE_INSTANCE_SHIFT 48
dd08ebf6
MB
31
32static struct xe_device *
33lrc_to_xe(struct xe_lrc *lrc)
34{
35 return gt_to_xe(lrc->fence_ctx.gt);
36}
37
38size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
39{
40 switch (class) {
41 case XE_ENGINE_CLASS_RENDER:
13a3398b
MR
42 if (GRAPHICS_VER(xe) >= 20)
43 return 4 * SZ_4K;
44 else
45 return 14 * SZ_4K;
dd08ebf6
MB
46 case XE_ENGINE_CLASS_COMPUTE:
47 /* 14 pages since graphics_ver == 11 */
13a3398b
MR
48 if (GRAPHICS_VER(xe) >= 20)
49 return 3 * SZ_4K;
50 else
51 return 14 * SZ_4K;
dd08ebf6
MB
52 default:
53 WARN(1, "Unknown engine class: %d", class);
54 fallthrough;
55 case XE_ENGINE_CLASS_COPY:
56 case XE_ENGINE_CLASS_VIDEO_DECODE:
57 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
29654910 58 case XE_ENGINE_CLASS_OTHER:
dd08ebf6
MB
59 return 2 * SZ_4K;
60 }
61}
62
63/*
64 * The per-platform tables are u8-encoded in @data. Decode @data and set the
65 * addresses' offset and commands in @regs. The following encoding is used
66 * for each byte. There are 2 steps: decoding commands and decoding addresses.
67 *
68 * Commands:
69 * [7]: create NOPs - number of NOPs are set in lower bits
70 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
71 * MI_LRI_FORCE_POSTED
72 * [5:0]: Number of NOPs or registers to set values to in case of
73 * MI_LOAD_REGISTER_IMM
74 *
75 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
76 * number of registers. They are set by using the REG/REG16 macros: the former
77 * is used for offsets smaller than 0x200 while the latter is for values bigger
78 * than that. Those macros already set all the bits documented below correctly:
79 *
80 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
81 * follow, for the lower bits
82 * [6:0]: Register offset, without considering the engine base.
83 *
84 * This function only tweaks the commands and register offsets. Values are not
85 * filled out.
86 */
87static void set_offsets(u32 *regs,
88 const u8 *data,
89 const struct xe_hw_engine *hwe)
90#define NOP(x) (BIT(7) | (x))
91#define LRI(count, flags) ((flags) << 6 | (count) | \
92 BUILD_BUG_ON_ZERO(count >= BIT(6)))
93#define POSTED BIT(0)
94#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
95#define REG16(x) \
96 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
97 (((x) >> 2) & 0x7f)
98#define END 0
99{
100 const u32 base = hwe->mmio_base;
101
102 while (*data) {
103 u8 count, flags;
104
105 if (*data & BIT(7)) { /* skip */
106 count = *data++ & ~BIT(7);
107 regs += count;
108 continue;
109 }
110
111 count = *data & 0x3f;
112 flags = *data >> 6;
113 data++;
114
e12a6488 115 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
dd08ebf6
MB
116 if (flags & POSTED)
117 *regs |= MI_LRI_FORCE_POSTED;
118 *regs |= MI_LRI_LRM_CS_MMIO;
119 regs++;
120
c73acc1e 121 xe_gt_assert(hwe->gt, count);
dd08ebf6
MB
122 do {
123 u32 offset = 0;
124 u8 v;
125
126 do {
127 v = *data++;
128 offset <<= 7;
129 offset |= v & ~BIT(7);
130 } while (v & BIT(7));
131
132 regs[0] = base + (offset << 2);
133 regs += 2;
134 } while (--count);
135 }
136
137 *regs = MI_BATCH_BUFFER_END | BIT(0);
138}
139
140static const u8 gen12_xcs_offsets[] = {
141 NOP(1),
142 LRI(13, POSTED),
143 REG16(0x244),
144 REG(0x034),
145 REG(0x030),
146 REG(0x038),
147 REG(0x03c),
148 REG(0x168),
149 REG(0x140),
150 REG(0x110),
151 REG(0x1c0),
152 REG(0x1c4),
153 REG(0x1c8),
154 REG(0x180),
155 REG16(0x2b4),
156
157 NOP(5),
158 LRI(9, POSTED),
159 REG16(0x3a8),
160 REG16(0x28c),
161 REG16(0x288),
162 REG16(0x284),
163 REG16(0x280),
164 REG16(0x27c),
165 REG16(0x278),
166 REG16(0x274),
167 REG16(0x270),
168
169 END
170};
171
172static const u8 dg2_xcs_offsets[] = {
173 NOP(1),
174 LRI(15, POSTED),
175 REG16(0x244),
176 REG(0x034),
177 REG(0x030),
178 REG(0x038),
179 REG(0x03c),
180 REG(0x168),
181 REG(0x140),
182 REG(0x110),
183 REG(0x1c0),
184 REG(0x1c4),
185 REG(0x1c8),
186 REG(0x180),
187 REG16(0x2b4),
188 REG(0x120),
189 REG(0x124),
190
191 NOP(1),
192 LRI(9, POSTED),
193 REG16(0x3a8),
194 REG16(0x28c),
195 REG16(0x288),
196 REG16(0x284),
197 REG16(0x280),
198 REG16(0x27c),
199 REG16(0x278),
200 REG16(0x274),
201 REG16(0x270),
202
203 END
204};
205
206static const u8 gen12_rcs_offsets[] = {
207 NOP(1),
208 LRI(13, POSTED),
209 REG16(0x244),
210 REG(0x034),
211 REG(0x030),
212 REG(0x038),
213 REG(0x03c),
214 REG(0x168),
215 REG(0x140),
216 REG(0x110),
217 REG(0x1c0),
218 REG(0x1c4),
219 REG(0x1c8),
220 REG(0x180),
221 REG16(0x2b4),
222
223 NOP(5),
224 LRI(9, POSTED),
225 REG16(0x3a8),
226 REG16(0x28c),
227 REG16(0x288),
228 REG16(0x284),
229 REG16(0x280),
230 REG16(0x27c),
231 REG16(0x278),
232 REG16(0x274),
233 REG16(0x270),
234
235 LRI(3, POSTED),
236 REG(0x1b0),
237 REG16(0x5a8),
238 REG16(0x5ac),
239
240 NOP(6),
241 LRI(1, 0),
242 REG(0x0c8),
243 NOP(3 + 9 + 1),
244
245 LRI(51, POSTED),
246 REG16(0x588),
247 REG16(0x588),
248 REG16(0x588),
249 REG16(0x588),
250 REG16(0x588),
251 REG16(0x588),
252 REG(0x028),
253 REG(0x09c),
254 REG(0x0c0),
255 REG(0x178),
256 REG(0x17c),
257 REG16(0x358),
258 REG(0x170),
259 REG(0x150),
260 REG(0x154),
261 REG(0x158),
262 REG16(0x41c),
263 REG16(0x600),
264 REG16(0x604),
265 REG16(0x608),
266 REG16(0x60c),
267 REG16(0x610),
268 REG16(0x614),
269 REG16(0x618),
270 REG16(0x61c),
271 REG16(0x620),
272 REG16(0x624),
273 REG16(0x628),
274 REG16(0x62c),
275 REG16(0x630),
276 REG16(0x634),
277 REG16(0x638),
278 REG16(0x63c),
279 REG16(0x640),
280 REG16(0x644),
281 REG16(0x648),
282 REG16(0x64c),
283 REG16(0x650),
284 REG16(0x654),
285 REG16(0x658),
286 REG16(0x65c),
287 REG16(0x660),
288 REG16(0x664),
289 REG16(0x668),
290 REG16(0x66c),
291 REG16(0x670),
292 REG16(0x674),
293 REG16(0x678),
294 REG16(0x67c),
295 REG(0x068),
296 REG(0x084),
297 NOP(1),
298
299 END
300};
301
302static const u8 xehp_rcs_offsets[] = {
303 NOP(1),
304 LRI(13, POSTED),
305 REG16(0x244),
306 REG(0x034),
307 REG(0x030),
308 REG(0x038),
309 REG(0x03c),
310 REG(0x168),
311 REG(0x140),
312 REG(0x110),
313 REG(0x1c0),
314 REG(0x1c4),
315 REG(0x1c8),
316 REG(0x180),
317 REG16(0x2b4),
318
319 NOP(5),
320 LRI(9, POSTED),
321 REG16(0x3a8),
322 REG16(0x28c),
323 REG16(0x288),
324 REG16(0x284),
325 REG16(0x280),
326 REG16(0x27c),
327 REG16(0x278),
328 REG16(0x274),
329 REG16(0x270),
330
331 LRI(3, POSTED),
332 REG(0x1b0),
333 REG16(0x5a8),
334 REG16(0x5ac),
335
336 NOP(6),
337 LRI(1, 0),
338 REG(0x0c8),
339
340 END
341};
342
343static const u8 dg2_rcs_offsets[] = {
344 NOP(1),
345 LRI(15, POSTED),
346 REG16(0x244),
347 REG(0x034),
348 REG(0x030),
349 REG(0x038),
350 REG(0x03c),
351 REG(0x168),
352 REG(0x140),
353 REG(0x110),
354 REG(0x1c0),
355 REG(0x1c4),
356 REG(0x1c8),
357 REG(0x180),
358 REG16(0x2b4),
359 REG(0x120),
360 REG(0x124),
361
362 NOP(1),
363 LRI(9, POSTED),
364 REG16(0x3a8),
365 REG16(0x28c),
366 REG16(0x288),
367 REG16(0x284),
368 REG16(0x280),
369 REG16(0x27c),
370 REG16(0x278),
371 REG16(0x274),
372 REG16(0x270),
373
374 LRI(3, POSTED),
375 REG(0x1b0),
376 REG16(0x5a8),
377 REG16(0x5ac),
378
379 NOP(6),
380 LRI(1, 0),
381 REG(0x0c8),
382
383 END
384};
385
386static const u8 mtl_rcs_offsets[] = {
3e8e7ee6
FD
387 NOP(1),
388 LRI(15, POSTED),
389 REG16(0x244),
390 REG(0x034),
391 REG(0x030),
392 REG(0x038),
393 REG(0x03c),
394 REG(0x168),
395 REG(0x140),
396 REG(0x110),
397 REG(0x1c0),
398 REG(0x1c4),
399 REG(0x1c8),
400 REG(0x180),
401 REG16(0x2b4),
402 REG(0x120),
403 REG(0x124),
404
405 NOP(1),
406 LRI(9, POSTED),
407 REG16(0x3a8),
408 REG16(0x28c),
409 REG16(0x288),
410 REG16(0x284),
411 REG16(0x280),
412 REG16(0x27c),
413 REG16(0x278),
414 REG16(0x274),
415 REG16(0x270),
416
417 NOP(2),
418 LRI(2, POSTED),
419 REG16(0x5a8),
420 REG16(0x5ac),
421
422 NOP(6),
423 LRI(1, 0),
424 REG(0x0c8),
425
426 END
dd08ebf6
MB
427};
428
c5fa5814
MR
429#define XE2_CTX_COMMON \
430 NOP(1), /* [0x00] */ \
431 LRI(15, POSTED), /* [0x01] */ \
432 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
433 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
434 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
435 REG(0x038), /* [0x08] RING_BUFFER_START */ \
436 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
437 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
438 REG(0x140), /* [0x0e] BB_ADDR */ \
439 REG(0x110), /* [0x10] BB_STATE */ \
440 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
441 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
442 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
443 REG(0x180), /* [0x18] CCID */ \
444 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
445 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
446 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
447 \
448 NOP(1), /* [0x20] */ \
449 LRI(9, POSTED), /* [0x21] */ \
450 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
451 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
452 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
453 REG16(0x284), /* [0x28] dummy reg */ \
454 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
455 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
456 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
457 REG16(0x274), /* [0x30] PTBP_UDW */ \
458 REG16(0x270) /* [0x32] PTBP_LDW */
459
460static const u8 xe2_rcs_offsets[] = {
461 XE2_CTX_COMMON,
462
463 NOP(2), /* [0x34] */
464 LRI(2, POSTED), /* [0x36] */
465 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
466 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
467
468 NOP(6), /* [0x41] */
469 LRI(1, 0), /* [0x47] */
470 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
471
472 END
473};
474
475static const u8 xe2_bcs_offsets[] = {
476 XE2_CTX_COMMON,
477
478 NOP(4 + 8 + 1), /* [0x34] */
479 LRI(2, POSTED), /* [0x41] */
480 REG16(0x200), /* [0x42] BCS_SWCTRL */
481 REG16(0x204), /* [0x44] BLIT_CCTL */
482
483 END
484};
485
486static const u8 xe2_xcs_offsets[] = {
487 XE2_CTX_COMMON,
488
489 END
490};
491
dd08ebf6
MB
492#undef END
493#undef REG16
494#undef REG
495#undef LRI
496#undef NOP
497
498static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
499{
500 if (class == XE_ENGINE_CLASS_RENDER) {
c5fa5814
MR
501 if (GRAPHICS_VER(xe) >= 20)
502 return xe2_rcs_offsets;
503 else if (GRAPHICS_VERx100(xe) >= 1270)
dd08ebf6
MB
504 return mtl_rcs_offsets;
505 else if (GRAPHICS_VERx100(xe) >= 1255)
506 return dg2_rcs_offsets;
507 else if (GRAPHICS_VERx100(xe) >= 1250)
508 return xehp_rcs_offsets;
509 else
510 return gen12_rcs_offsets;
c5fa5814
MR
511 } else if (class == XE_ENGINE_CLASS_COPY) {
512 if (GRAPHICS_VER(xe) >= 20)
513 return xe2_bcs_offsets;
514 else
515 return gen12_xcs_offsets;
dd08ebf6 516 } else {
c5fa5814
MR
517 if (GRAPHICS_VER(xe) >= 20)
518 return xe2_xcs_offsets;
519 else if (GRAPHICS_VERx100(xe) >= 1255)
dd08ebf6
MB
520 return dg2_xcs_offsets;
521 else
522 return gen12_xcs_offsets;
523 }
524}
525
4ab5901c 526static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
dd08ebf6
MB
527{
528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
529 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
530 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
531
532 /* TODO: Timestamp */
533}
534
535static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
536{
537 struct xe_device *xe = gt_to_xe(hwe->gt);
538
539 if (GRAPHICS_VERx100(xe) >= 1250)
540 return 0x70;
541 else
542 return 0x60;
543}
544
545static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
546{
547 int x;
548
549 x = lrc_ring_mi_mode(hwe);
550 regs[x + 1] &= ~STOP_RING;
551 regs[x + 1] |= STOP_RING << 16;
552}
553
554static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
555{
556 return 0;
557}
558
559u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
560{
561 return lrc->ring.size;
562}
563
564/* Make the magic macros work */
565#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
566
567#define LRC_SEQNO_PPHWSP_OFFSET 512
f5b85ab6 568#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
dd08ebf6
MB
569#define LRC_PARALLEL_PPHWSP_OFFSET 2048
570#define LRC_PPHWSP_SIZE SZ_4K
571
572static size_t lrc_reg_size(struct xe_device *xe)
573{
574 if (GRAPHICS_VERx100(xe) >= 1250)
575 return 96 * sizeof(u32);
576 else
577 return 80 * sizeof(u32);
578}
579
580size_t xe_lrc_skip_size(struct xe_device *xe)
581{
582 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
583}
584
585static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
586{
587 /* The seqno is stored in the driver-defined portion of PPHWSP */
588 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
589}
590
591static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
592{
593 /* The start seqno is stored in the driver-defined portion of PPHWSP */
594 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
595}
596
597static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
598{
599 /* The parallel is stored in the driver-defined portion of PPHWSP */
600 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
601}
602
603static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
604{
605 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
606}
607
608#define DECL_MAP_ADDR_HELPERS(elem) \
609static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
610{ \
611 struct iosys_map map = lrc->bo->vmap; \
612\
c73acc1e 613 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
dd08ebf6
MB
614 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
615 return map; \
616} \
617static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
618{ \
619 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
620} \
621
622DECL_MAP_ADDR_HELPERS(ring)
623DECL_MAP_ADDR_HELPERS(pphwsp)
624DECL_MAP_ADDR_HELPERS(seqno)
625DECL_MAP_ADDR_HELPERS(regs)
626DECL_MAP_ADDR_HELPERS(start_seqno)
627DECL_MAP_ADDR_HELPERS(parallel)
628
629#undef DECL_MAP_ADDR_HELPERS
630
631u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
632{
633 return __xe_lrc_pphwsp_ggtt_addr(lrc);
634}
635
636u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
637{
638 struct xe_device *xe = lrc_to_xe(lrc);
639 struct iosys_map map;
640
641 map = __xe_lrc_regs_map(lrc);
642 iosys_map_incr(&map, reg_nr * sizeof(u32));
643 return xe_map_read32(xe, &map);
644}
645
646void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
647{
648 struct xe_device *xe = lrc_to_xe(lrc);
649 struct iosys_map map;
650
651 map = __xe_lrc_regs_map(lrc);
652 iosys_map_incr(&map, reg_nr * sizeof(u32));
653 xe_map_write32(xe, &map, val);
654}
655
656static void *empty_lrc_data(struct xe_hw_engine *hwe)
657{
658 struct xe_device *xe = gt_to_xe(hwe->gt);
659 void *data;
660 u32 *regs;
661
662 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
663 if (!data)
664 return NULL;
665
666 /* 1st page: Per-Process of HW status Page */
667 regs = data + LRC_PPHWSP_SIZE;
668 set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
669 set_context_control(regs, hwe);
670 reset_stop_ring(regs, hwe);
671
672 return data;
673}
674
675static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
676{
876611c2 677 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
dd08ebf6
MB
678
679 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
680 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
681}
682
683#define PVC_CTX_ASID (0x2e + 1)
684#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
dd08ebf6
MB
685
686int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
9b9529ce 687 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
dd08ebf6
MB
688{
689 struct xe_gt *gt = hwe->gt;
876611c2 690 struct xe_tile *tile = gt_to_tile(gt);
dd08ebf6
MB
691 struct xe_device *xe = gt_to_xe(gt);
692 struct iosys_map map;
693 void *init_data = NULL;
694 u32 arb_enable;
695 int err;
696
697 lrc->flags = 0;
698
e103c45f
MA
699 /*
700 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
701 * via VM bind calls.
702 */
876611c2 703 lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
dd08ebf6
MB
704 ring_size + xe_lrc_size(xe, hwe->class),
705 ttm_bo_type_kernel,
876611c2 706 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
dd08ebf6
MB
707 XE_BO_CREATE_GGTT_BIT);
708 if (IS_ERR(lrc->bo))
709 return PTR_ERR(lrc->bo);
710
876611c2 711 lrc->tile = gt_to_tile(hwe->gt);
dd08ebf6
MB
712 lrc->ring.size = ring_size;
713 lrc->ring.tail = 0;
714
715 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
716 hwe->fence_irq, hwe->name);
717
718 if (!gt->default_lrc[hwe->class]) {
719 init_data = empty_lrc_data(hwe);
720 if (!init_data) {
e103c45f
MA
721 err = -ENOMEM;
722 goto err_lrc_finish;
dd08ebf6
MB
723 }
724 }
725
726 /*
727 * Init Per-Process of HW status Page, LRC / context state to known
728 * values
729 */
730 map = __xe_lrc_pphwsp_map(lrc);
731 if (!init_data) {
732 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
733 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
734 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
735 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
736 } else {
737 xe_map_memcpy_to(xe, &map, 0, init_data,
738 xe_lrc_size(xe, hwe->class));
739 kfree(init_data);
740 }
741
303fb116 742 if (vm) {
dd08ebf6
MB
743 xe_lrc_set_ppgtt(lrc, vm);
744
303fb116
TU
745 if (vm->xef)
746 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
747 }
748
dd08ebf6
MB
749 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
750 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
751 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
752 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
753 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
5669899e 754 if (xe->info.has_asid && vm)
eaa367a0 755 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
dd08ebf6 756
89642db3
MB
757 lrc->desc = LRC_VALID;
758 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
dd08ebf6
MB
759 /* TODO: Priority */
760
761 /* While this appears to have something about privileged batches or
762 * some such, it really just means PPGTT mode.
763 */
764 if (vm)
89642db3 765 lrc->desc |= LRC_PRIVILEGE;
dd08ebf6
MB
766
767 if (GRAPHICS_VERx100(xe) < 1250) {
0bc519d2
LDM
768 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
769 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
dd08ebf6
MB
770 }
771
772 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
773 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
774
7c51050b
TH
775 map = __xe_lrc_seqno_map(lrc);
776 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
777
7500477d
MA
778 map = __xe_lrc_start_seqno_map(lrc);
779 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
780
dd08ebf6
MB
781 return 0;
782
e103c45f
MA
783err_lrc_finish:
784 xe_lrc_finish(lrc);
dd08ebf6
MB
785 return err;
786}
787
788void xe_lrc_finish(struct xe_lrc *lrc)
789{
dd08ebf6 790 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
b7ab8c4f 791 xe_bo_lock(lrc->bo, false);
e103c45f 792 xe_bo_unpin(lrc->bo);
b7ab8c4f 793 xe_bo_unlock(lrc->bo);
dd08ebf6
MB
794 xe_bo_put(lrc->bo);
795}
796
797void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
798{
799 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
800}
801
802u32 xe_lrc_ring_head(struct xe_lrc *lrc)
803{
804 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
805}
806
807u32 xe_lrc_ring_space(struct xe_lrc *lrc)
808{
809 const u32 head = xe_lrc_ring_head(lrc);
810 const u32 tail = lrc->ring.tail;
811 const u32 size = lrc->ring.size;
812
813 return ((head - tail - 1) & (size - 1)) + 1;
814}
815
816static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
817 const void *data, size_t size)
818{
819 struct xe_device *xe = lrc_to_xe(lrc);
820
821 iosys_map_incr(&ring, lrc->ring.tail);
822 xe_map_memcpy_to(xe, &ring, 0, data, size);
823 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
824}
825
826void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
827{
c73acc1e 828 struct xe_device *xe = lrc_to_xe(lrc);
dd08ebf6
MB
829 struct iosys_map ring;
830 u32 rhs;
831 size_t aligned_size;
832
c73acc1e 833 xe_assert(xe, IS_ALIGNED(size, 4));
dd08ebf6
MB
834 aligned_size = ALIGN(size, 8);
835
836 ring = __xe_lrc_ring_map(lrc);
837
c73acc1e 838 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
dd08ebf6
MB
839 rhs = lrc->ring.size - lrc->ring.tail;
840 if (size > rhs) {
841 __xe_lrc_write_ring(lrc, ring, data, rhs);
842 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
843 } else {
844 __xe_lrc_write_ring(lrc, ring, data, size);
845 }
846
847 if (aligned_size > size) {
848 u32 noop = MI_NOOP;
849
850 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
851 }
852}
853
854u64 xe_lrc_descriptor(struct xe_lrc *lrc)
855{
856 return lrc->desc | xe_lrc_ggtt_addr(lrc);
857}
858
859u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
860{
861 return __xe_lrc_seqno_ggtt_addr(lrc);
862}
863
864struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
865{
866 return &xe_hw_fence_create(&lrc->fence_ctx,
867 __xe_lrc_seqno_map(lrc))->dma;
868}
869
870s32 xe_lrc_seqno(struct xe_lrc *lrc)
871{
872 struct iosys_map map = __xe_lrc_seqno_map(lrc);
873
874 return xe_map_read32(lrc_to_xe(lrc), &map);
875}
876
877s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
878{
879 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
880
881 return xe_map_read32(lrc_to_xe(lrc), &map);
882}
883
884u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
885{
886 return __xe_lrc_start_seqno_ggtt_addr(lrc);
887}
888
889u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
890{
891 return __xe_lrc_parallel_ggtt_addr(lrc);
892}
893
894struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
895{
896 return __xe_lrc_parallel_map(lrc);
897}
0f60547f
MR
898
899static int instr_dw(u32 cmd_header)
900{
6de492ae
MR
901 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
902 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
903 GFXPIPE_SINGLE_DW_CMD(0, 0))
904 return 1;
905
906 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
907 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
908 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
909
0f60547f
MR
910 /* Most instructions have the # of dwords (minus 2) in 7:0 */
911 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
912}
913
914static int dump_mi_command(struct drm_printer *p,
915 struct xe_gt *gt,
916 u32 *dw,
917 int remaining_dw)
918{
919 u32 inst_header = *dw;
920 u32 numdw = instr_dw(inst_header);
921 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
922 int num_noop;
923
924 /* First check for commands that don't have/use a '# DW' field */
925 switch (inst_header & MI_OPCODE) {
926 case MI_NOOP:
927 num_noop = 1;
928 while (num_noop < remaining_dw &&
929 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
930 num_noop++;
931 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
932 return num_noop;
933
934 case MI_TOPOLOGY_FILTER:
935 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
936 return 1;
937
938 case MI_BATCH_BUFFER_END:
939 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
940 /* Return 'remaining_dw' to consume the rest of the LRC */
941 return remaining_dw;
942 }
943
944 /*
945 * Any remaining commands include a # of dwords. We should make sure
946 * it doesn't exceed the remaining size of the LRC.
947 */
948 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
949 numdw = remaining_dw;
950
951 switch (inst_header & MI_OPCODE) {
952 case MI_LOAD_REGISTER_IMM:
953 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
954 inst_header, (numdw - 1) / 2);
955 for (int i = 1; i < numdw; i += 2)
956 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
957 return numdw;
958
959 case MI_FORCE_WAKEUP:
960 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
961 return numdw;
962
963 default:
964 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
965 inst_header, opcode, numdw);
966 return numdw;
967 }
968}
969
6de492ae
MR
970static int dump_gfxpipe_command(struct drm_printer *p,
971 struct xe_gt *gt,
972 u32 *dw,
973 int remaining_dw)
974{
975 u32 numdw = instr_dw(*dw);
976 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
977 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
978 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
979
980 /*
981 * Make sure we haven't mis-parsed a number of dwords that exceeds the
982 * remaining size of the LRC.
983 */
984 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
985 numdw = remaining_dw;
986
987 switch (*dw & GFXPIPE_MATCH_MASK) {
988#define MATCH(cmd) \
989 case cmd: \
990 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
991 return numdw
992#define MATCH3D(cmd) \
993 case CMD_##cmd: \
994 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
995 return numdw
996
997 MATCH(STATE_BASE_ADDRESS);
998 MATCH(STATE_SIP);
999 MATCH(GPGPU_CSR_BASE_ADDRESS);
1000 MATCH(STATE_COMPUTE_MODE);
1001 MATCH3D(3DSTATE_BTD);
1002
1003 MATCH3D(3DSTATE_VF_STATISTICS);
1004
1005 MATCH(PIPELINE_SELECT);
1006
fb24b858 1007 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
6de492ae
MR
1008 MATCH3D(3DSTATE_CLEAR_PARAMS);
1009 MATCH3D(3DSTATE_DEPTH_BUFFER);
1010 MATCH3D(3DSTATE_STENCIL_BUFFER);
1011 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1012 MATCH3D(3DSTATE_VERTEX_BUFFERS);
72ac3047 1013 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
6de492ae
MR
1014 MATCH3D(3DSTATE_INDEX_BUFFER);
1015 MATCH3D(3DSTATE_VF);
72ac3047 1016 MATCH3D(3DSTATE_MULTISAMPLE);
6de492ae 1017 MATCH3D(3DSTATE_CC_STATE_POINTERS);
72ac3047
MR
1018 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1019 MATCH3D(3DSTATE_VS);
1020 MATCH3D(3DSTATE_GS);
1021 MATCH3D(3DSTATE_CLIP);
1022 MATCH3D(3DSTATE_SF);
6de492ae 1023 MATCH3D(3DSTATE_WM);
72ac3047
MR
1024 MATCH3D(3DSTATE_CONSTANT_VS);
1025 MATCH3D(3DSTATE_CONSTANT_GS);
6de492ae 1026 MATCH3D(3DSTATE_SAMPLE_MASK);
72ac3047
MR
1027 MATCH3D(3DSTATE_CONSTANT_HS);
1028 MATCH3D(3DSTATE_CONSTANT_DS);
1029 MATCH3D(3DSTATE_HS);
1030 MATCH3D(3DSTATE_TE);
1031 MATCH3D(3DSTATE_DS);
1032 MATCH3D(3DSTATE_STREAMOUT);
6de492ae
MR
1033 MATCH3D(3DSTATE_SBE);
1034 MATCH3D(3DSTATE_PS);
72ac3047 1035 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
6de492ae
MR
1036 MATCH3D(3DSTATE_CPS_POINTERS);
1037 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1038 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
72ac3047
MR
1039 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1040 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1041 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1042 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
6de492ae 1043 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
72ac3047
MR
1044 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1045 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1046 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1047 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
6de492ae
MR
1048 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1049 MATCH3D(3DSTATE_VF_INSTANCING);
72ac3047 1050 MATCH3D(3DSTATE_VF_SGVS);
6de492ae
MR
1051 MATCH3D(3DSTATE_VF_TOPOLOGY);
1052 MATCH3D(3DSTATE_WM_CHROMAKEY);
1053 MATCH3D(3DSTATE_PS_BLEND);
1054 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1055 MATCH3D(3DSTATE_PS_EXTRA);
72ac3047 1056 MATCH3D(3DSTATE_RASTER);
6de492ae 1057 MATCH3D(3DSTATE_SBE_SWIZ);
72ac3047
MR
1058 MATCH3D(3DSTATE_WM_HZ_OP);
1059 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1060 MATCH3D(3DSTATE_VF_SGVS_2);
6de492ae 1061 MATCH3D(3DSTATE_VFG);
72ac3047
MR
1062 MATCH3D(3DSTATE_URB_ALLOC_VS);
1063 MATCH3D(3DSTATE_URB_ALLOC_HS);
1064 MATCH3D(3DSTATE_URB_ALLOC_DS);
1065 MATCH3D(3DSTATE_URB_ALLOC_GS);
1066 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1067 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1068 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1069 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1070 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1071 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
6de492ae
MR
1072 MATCH3D(3DSTATE_AMFS);
1073 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1074 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1075 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
72ac3047 1076 MATCH3D(3DSTATE_MESH_CONTROL);
6de492ae 1077 MATCH3D(3DSTATE_MESH_DISTRIB);
72ac3047
MR
1078 MATCH3D(3DSTATE_TASK_REDISTRIB);
1079 MATCH3D(3DSTATE_MESH_SHADER);
1080 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1081 MATCH3D(3DSTATE_TASK_CONTROL);
1082 MATCH3D(3DSTATE_TASK_SHADER);
1083 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1084 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1085 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1086 MATCH3D(3DSTATE_CLIP_MESH);
6de492ae
MR
1087 MATCH3D(3DSTATE_SBE_MESH);
1088 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1089
72ac3047 1090 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
6de492ae
MR
1091 MATCH3D(3DSTATE_CHROMA_KEY);
1092 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1093 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1094 MATCH3D(3DSTATE_LINE_STIPPLE);
1095 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1096 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1097 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1098 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1099 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1100 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1101 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1102 MATCH3D(3DSTATE_SO_DECL_LIST);
72ac3047 1103 MATCH3D(3DSTATE_SO_BUFFER);
6de492ae
MR
1104 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1105 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1106 MATCH3D(3DSTATE_3D_MODE);
1107 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1108 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1109 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1110
1111 default:
1112 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1113 *dw, pipeline, opcode, subopcode, numdw);
1114 return numdw;
1115 }
1116}
1117
0f60547f
MR
1118void xe_lrc_dump_default(struct drm_printer *p,
1119 struct xe_gt *gt,
1120 enum xe_engine_class hwe_class)
1121{
1122 u32 *dw;
1123 int remaining_dw, num_dw;
1124
1125 if (!gt->default_lrc[hwe_class]) {
1126 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1127 return;
1128 }
1129
1130 /*
1131 * Skip the beginning of the LRC since it contains the per-process
1132 * hardware status page.
1133 */
1134 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1135 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
1136
1137 while (remaining_dw > 0) {
1138 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1139 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
6de492ae
MR
1140 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1141 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
0f60547f
MR
1142 } else {
1143 num_dw = min(instr_dw(*dw), remaining_dw);
1144 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1145 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1146 num_dw);
1147 }
1148
1149 dw += num_dw;
1150 remaining_dw -= num_dw;
1151 }
1152}
b1543a49
MR
1153
1154struct instr_state {
1155 u32 instr;
1156 u16 num_dw;
1157};
1158
72ac3047
MR
1159static const struct instr_state xe_hpg_svg_state[] = {
1160 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1161 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1162 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1163 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1164 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1165 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1166 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1167 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1168 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1169 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1170 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1171 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1172 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1173 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1174 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1175 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1176 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1177 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1178 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1179 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1180 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1181 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1182 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1183 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1184 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1185 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1186 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1187 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1188 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1189 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1190 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1191 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1192 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1193 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1194 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1195 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1196 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1197 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1198 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1199 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1200 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1201 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1202 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1203 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1204 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1205 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1206 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1207 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1208 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1209 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1210};
1211
b1543a49
MR
1212void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1213{
1214 struct xe_gt *gt = q->hwe->gt;
1215 struct xe_device *xe = gt_to_xe(gt);
1216 const struct instr_state *state_table = NULL;
1217 int state_table_size = 0;
1218
1219 /*
1220 * At the moment we only need to emit non-register state for the RCS
1221 * engine.
1222 */
1223 if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1224 return;
1225
1226 switch (GRAPHICS_VERx100(xe)) {
72ac3047 1227 case 1255:
fb24b858 1228 case 1270 ... 2004:
72ac3047
MR
1229 state_table = xe_hpg_svg_state;
1230 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1231 break;
b1543a49
MR
1232 default:
1233 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1234 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1235 return;
1236 }
1237
1238 for (int i = 0; i < state_table_size; i++) {
1239 u32 instr = state_table[i].instr;
1240 u16 num_dw = state_table[i].num_dw;
1241 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1242
1243 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1244 xe_gt_assert(gt, num_dw != 0);
1245 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1246
fb24b858
MR
1247 /*
1248 * Xe2's SVG context is the same as the one on DG2 / MTL
1249 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1250 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1251 * Just make the replacement here rather than defining a
1252 * whole separate table for the single trivial change.
1253 */
1254 if (GRAPHICS_VER(xe) >= 20 &&
1255 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1256 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1257
b1543a49
MR
1258 bb->cs[bb->len] = instr;
1259 if (!is_single_dw)
1260 bb->cs[bb->len] |= (num_dw - 2);
1261
1262 bb->len += num_dw;
1263 }
1264}