6e7b70532d111c87932bdb33dcc7f9621ea55baf
[linux-2.6-block.git] / drivers / gpu / drm / xe / xe_lrc.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_lrc.h"
7
8 #include <generated/xe_wa_oob.h>
9
10 #include <linux/ascii85.h>
11
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_mmio.h"
28 #include "xe_sriov.h"
29 #include "xe_trace_lrc.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32
33 #define LRC_VALID                               BIT_ULL(0)
34 #define LRC_PRIVILEGE                           BIT_ULL(8)
35 #define LRC_ADDRESSING_MODE                     GENMASK_ULL(4, 3)
36 #define LRC_LEGACY_64B_CONTEXT                  3
37
38 #define LRC_ENGINE_CLASS                        GENMASK_ULL(63, 61)
39 #define LRC_ENGINE_INSTANCE                     GENMASK_ULL(53, 48)
40
41 #define LRC_PPHWSP_SIZE                         SZ_4K
42 #define LRC_INDIRECT_RING_STATE_SIZE            SZ_4K
43 #define LRC_WA_BB_SIZE                          SZ_4K
44
45 static struct xe_device *
46 lrc_to_xe(struct xe_lrc *lrc)
47 {
48         return gt_to_xe(lrc->fence_ctx.gt);
49 }
50
51 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
52 {
53         struct xe_device *xe = gt_to_xe(gt);
54         size_t size;
55
56         /* Per-process HW status page (PPHWSP) */
57         size = LRC_PPHWSP_SIZE;
58
59         /* Engine context image */
60         switch (class) {
61         case XE_ENGINE_CLASS_RENDER:
62                 if (GRAPHICS_VER(xe) >= 20)
63                         size += 3 * SZ_4K;
64                 else
65                         size += 13 * SZ_4K;
66                 break;
67         case XE_ENGINE_CLASS_COMPUTE:
68                 if (GRAPHICS_VER(xe) >= 20)
69                         size += 2 * SZ_4K;
70                 else
71                         size += 13 * SZ_4K;
72                 break;
73         default:
74                 WARN(1, "Unknown engine class: %d", class);
75                 fallthrough;
76         case XE_ENGINE_CLASS_COPY:
77         case XE_ENGINE_CLASS_VIDEO_DECODE:
78         case XE_ENGINE_CLASS_VIDEO_ENHANCE:
79         case XE_ENGINE_CLASS_OTHER:
80                 size += 1 * SZ_4K;
81         }
82
83         /* Add indirect ring state page */
84         if (xe_gt_has_indirect_ring_state(gt))
85                 size += LRC_INDIRECT_RING_STATE_SIZE;
86
87         return size;
88 }
89
90 /*
91  * The per-platform tables are u8-encoded in @data. Decode @data and set the
92  * addresses' offset and commands in @regs. The following encoding is used
93  * for each byte. There are 2 steps: decoding commands and decoding addresses.
94  *
95  * Commands:
96  * [7]: create NOPs - number of NOPs are set in lower bits
97  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
98  *      MI_LRI_FORCE_POSTED
99  * [5:0]: Number of NOPs or registers to set values to in case of
100  *        MI_LOAD_REGISTER_IMM
101  *
102  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
103  * number of registers. They are set by using the REG/REG16 macros: the former
104  * is used for offsets smaller than 0x200 while the latter is for values bigger
105  * than that. Those macros already set all the bits documented below correctly:
106  *
107  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
108  *      follow, for the lower bits
109  * [6:0]: Register offset, without considering the engine base.
110  *
111  * This function only tweaks the commands and register offsets. Values are not
112  * filled out.
113  */
114 static void set_offsets(u32 *regs,
115                         const u8 *data,
116                         const struct xe_hw_engine *hwe)
117 #define NOP(x) (BIT(7) | (x))
118 #define LRI(count, flags) ((flags) << 6 | (count) | \
119                            BUILD_BUG_ON_ZERO(count >= BIT(6)))
120 #define POSTED BIT(0)
121 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
122 #define REG16(x) \
123         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
124         (((x) >> 2) & 0x7f)
125 {
126         const u32 base = hwe->mmio_base;
127
128         while (*data) {
129                 u8 count, flags;
130
131                 if (*data & BIT(7)) { /* skip */
132                         count = *data++ & ~BIT(7);
133                         regs += count;
134                         continue;
135                 }
136
137                 count = *data & 0x3f;
138                 flags = *data >> 6;
139                 data++;
140
141                 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
142                 if (flags & POSTED)
143                         *regs |= MI_LRI_FORCE_POSTED;
144                 *regs |= MI_LRI_LRM_CS_MMIO;
145                 regs++;
146
147                 xe_gt_assert(hwe->gt, count);
148                 do {
149                         u32 offset = 0;
150                         u8 v;
151
152                         do {
153                                 v = *data++;
154                                 offset <<= 7;
155                                 offset |= v & ~BIT(7);
156                         } while (v & BIT(7));
157
158                         regs[0] = base + (offset << 2);
159                         regs += 2;
160                 } while (--count);
161         }
162
163         *regs = MI_BATCH_BUFFER_END | BIT(0);
164 }
165
166 static const u8 gen12_xcs_offsets[] = {
167         NOP(1),
168         LRI(13, POSTED),
169         REG16(0x244),
170         REG(0x034),
171         REG(0x030),
172         REG(0x038),
173         REG(0x03c),
174         REG(0x168),
175         REG(0x140),
176         REG(0x110),
177         REG(0x1c0),
178         REG(0x1c4),
179         REG(0x1c8),
180         REG(0x180),
181         REG16(0x2b4),
182
183         NOP(5),
184         LRI(9, POSTED),
185         REG16(0x3a8),
186         REG16(0x28c),
187         REG16(0x288),
188         REG16(0x284),
189         REG16(0x280),
190         REG16(0x27c),
191         REG16(0x278),
192         REG16(0x274),
193         REG16(0x270),
194
195         0
196 };
197
198 static const u8 dg2_xcs_offsets[] = {
199         NOP(1),
200         LRI(15, POSTED),
201         REG16(0x244),
202         REG(0x034),
203         REG(0x030),
204         REG(0x038),
205         REG(0x03c),
206         REG(0x168),
207         REG(0x140),
208         REG(0x110),
209         REG(0x1c0),
210         REG(0x1c4),
211         REG(0x1c8),
212         REG(0x180),
213         REG16(0x2b4),
214         REG(0x120),
215         REG(0x124),
216
217         NOP(1),
218         LRI(9, POSTED),
219         REG16(0x3a8),
220         REG16(0x28c),
221         REG16(0x288),
222         REG16(0x284),
223         REG16(0x280),
224         REG16(0x27c),
225         REG16(0x278),
226         REG16(0x274),
227         REG16(0x270),
228
229         0
230 };
231
232 static const u8 gen12_rcs_offsets[] = {
233         NOP(1),
234         LRI(13, POSTED),
235         REG16(0x244),
236         REG(0x034),
237         REG(0x030),
238         REG(0x038),
239         REG(0x03c),
240         REG(0x168),
241         REG(0x140),
242         REG(0x110),
243         REG(0x1c0),
244         REG(0x1c4),
245         REG(0x1c8),
246         REG(0x180),
247         REG16(0x2b4),
248
249         NOP(5),
250         LRI(9, POSTED),
251         REG16(0x3a8),
252         REG16(0x28c),
253         REG16(0x288),
254         REG16(0x284),
255         REG16(0x280),
256         REG16(0x27c),
257         REG16(0x278),
258         REG16(0x274),
259         REG16(0x270),
260
261         LRI(3, POSTED),
262         REG(0x1b0),
263         REG16(0x5a8),
264         REG16(0x5ac),
265
266         NOP(6),
267         LRI(1, 0),
268         REG(0x0c8),
269         NOP(3 + 9 + 1),
270
271         LRI(51, POSTED),
272         REG16(0x588),
273         REG16(0x588),
274         REG16(0x588),
275         REG16(0x588),
276         REG16(0x588),
277         REG16(0x588),
278         REG(0x028),
279         REG(0x09c),
280         REG(0x0c0),
281         REG(0x178),
282         REG(0x17c),
283         REG16(0x358),
284         REG(0x170),
285         REG(0x150),
286         REG(0x154),
287         REG(0x158),
288         REG16(0x41c),
289         REG16(0x600),
290         REG16(0x604),
291         REG16(0x608),
292         REG16(0x60c),
293         REG16(0x610),
294         REG16(0x614),
295         REG16(0x618),
296         REG16(0x61c),
297         REG16(0x620),
298         REG16(0x624),
299         REG16(0x628),
300         REG16(0x62c),
301         REG16(0x630),
302         REG16(0x634),
303         REG16(0x638),
304         REG16(0x63c),
305         REG16(0x640),
306         REG16(0x644),
307         REG16(0x648),
308         REG16(0x64c),
309         REG16(0x650),
310         REG16(0x654),
311         REG16(0x658),
312         REG16(0x65c),
313         REG16(0x660),
314         REG16(0x664),
315         REG16(0x668),
316         REG16(0x66c),
317         REG16(0x670),
318         REG16(0x674),
319         REG16(0x678),
320         REG16(0x67c),
321         REG(0x068),
322         REG(0x084),
323         NOP(1),
324
325         0
326 };
327
328 static const u8 xehp_rcs_offsets[] = {
329         NOP(1),
330         LRI(13, POSTED),
331         REG16(0x244),
332         REG(0x034),
333         REG(0x030),
334         REG(0x038),
335         REG(0x03c),
336         REG(0x168),
337         REG(0x140),
338         REG(0x110),
339         REG(0x1c0),
340         REG(0x1c4),
341         REG(0x1c8),
342         REG(0x180),
343         REG16(0x2b4),
344
345         NOP(5),
346         LRI(9, POSTED),
347         REG16(0x3a8),
348         REG16(0x28c),
349         REG16(0x288),
350         REG16(0x284),
351         REG16(0x280),
352         REG16(0x27c),
353         REG16(0x278),
354         REG16(0x274),
355         REG16(0x270),
356
357         LRI(3, POSTED),
358         REG(0x1b0),
359         REG16(0x5a8),
360         REG16(0x5ac),
361
362         NOP(6),
363         LRI(1, 0),
364         REG(0x0c8),
365
366         0
367 };
368
369 static const u8 dg2_rcs_offsets[] = {
370         NOP(1),
371         LRI(15, POSTED),
372         REG16(0x244),
373         REG(0x034),
374         REG(0x030),
375         REG(0x038),
376         REG(0x03c),
377         REG(0x168),
378         REG(0x140),
379         REG(0x110),
380         REG(0x1c0),
381         REG(0x1c4),
382         REG(0x1c8),
383         REG(0x180),
384         REG16(0x2b4),
385         REG(0x120),
386         REG(0x124),
387
388         NOP(1),
389         LRI(9, POSTED),
390         REG16(0x3a8),
391         REG16(0x28c),
392         REG16(0x288),
393         REG16(0x284),
394         REG16(0x280),
395         REG16(0x27c),
396         REG16(0x278),
397         REG16(0x274),
398         REG16(0x270),
399
400         LRI(3, POSTED),
401         REG(0x1b0),
402         REG16(0x5a8),
403         REG16(0x5ac),
404
405         NOP(6),
406         LRI(1, 0),
407         REG(0x0c8),
408
409         0
410 };
411
412 static const u8 mtl_rcs_offsets[] = {
413         NOP(1),
414         LRI(15, POSTED),
415         REG16(0x244),
416         REG(0x034),
417         REG(0x030),
418         REG(0x038),
419         REG(0x03c),
420         REG(0x168),
421         REG(0x140),
422         REG(0x110),
423         REG(0x1c0),
424         REG(0x1c4),
425         REG(0x1c8),
426         REG(0x180),
427         REG16(0x2b4),
428         REG(0x120),
429         REG(0x124),
430
431         NOP(1),
432         LRI(9, POSTED),
433         REG16(0x3a8),
434         REG16(0x28c),
435         REG16(0x288),
436         REG16(0x284),
437         REG16(0x280),
438         REG16(0x27c),
439         REG16(0x278),
440         REG16(0x274),
441         REG16(0x270),
442
443         NOP(2),
444         LRI(2, POSTED),
445         REG16(0x5a8),
446         REG16(0x5ac),
447
448         NOP(6),
449         LRI(1, 0),
450         REG(0x0c8),
451
452         0
453 };
454
455 #define XE2_CTX_COMMON \
456         NOP(1),                 /* [0x00] */ \
457         LRI(15, POSTED),        /* [0x01] */ \
458         REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
459         REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
460         REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
461         REG(0x038),             /* [0x08] RING_BUFFER_START */ \
462         REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
463         REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
464         REG(0x140),             /* [0x0e] BB_ADDR */ \
465         REG(0x110),             /* [0x10] BB_STATE */ \
466         REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
467         REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
468         REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
469         REG(0x180),             /* [0x18] CCID */ \
470         REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
471         REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
472         REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
473         \
474         NOP(1),                 /* [0x20] */ \
475         LRI(9, POSTED),         /* [0x21] */ \
476         REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
477         REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
478         REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
479         REG16(0x284),           /* [0x28] dummy reg */ \
480         REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
481         REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
482         REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
483         REG16(0x274),           /* [0x30] PTBP_UDW */ \
484         REG16(0x270)            /* [0x32] PTBP_LDW */
485
486 static const u8 xe2_rcs_offsets[] = {
487         XE2_CTX_COMMON,
488
489         NOP(2),                 /* [0x34] */
490         LRI(2, POSTED),         /* [0x36] */
491         REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
492         REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
493
494         NOP(6),                 /* [0x41] */
495         LRI(1, 0),              /* [0x47] */
496         REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
497
498         0
499 };
500
501 static const u8 xe2_bcs_offsets[] = {
502         XE2_CTX_COMMON,
503
504         NOP(4 + 8 + 1),         /* [0x34] */
505         LRI(2, POSTED),         /* [0x41] */
506         REG16(0x200),           /* [0x42] BCS_SWCTRL */
507         REG16(0x204),           /* [0x44] BLIT_CCTL */
508
509         0
510 };
511
512 static const u8 xe2_xcs_offsets[] = {
513         XE2_CTX_COMMON,
514
515         0
516 };
517
518 static const u8 xe2_indirect_ring_state_offsets[] = {
519         NOP(1),                 /* [0x00] */
520         LRI(5, POSTED),         /* [0x01] */
521         REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
522         REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
523         REG(0x038),             /* [0x06] RING_BUFFER_START */
524         REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
525         REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
526
527         NOP(5),                 /* [0x0c] */
528         LRI(9, POSTED),         /* [0x11] */
529         REG(0x168),             /* [0x12] BB_ADDR_UDW */
530         REG(0x140),             /* [0x14] BB_ADDR */
531         REG(0x110),             /* [0x16] BB_STATE */
532         REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
533         REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
534         REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
535         REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
536         REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
537         REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
538
539         NOP(12),                 /* [0x00] */
540
541         0
542 };
543
544 #undef REG16
545 #undef REG
546 #undef LRI
547 #undef NOP
548
549 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
550 {
551         if (class == XE_ENGINE_CLASS_RENDER) {
552                 if (GRAPHICS_VER(xe) >= 20)
553                         return xe2_rcs_offsets;
554                 else if (GRAPHICS_VERx100(xe) >= 1270)
555                         return mtl_rcs_offsets;
556                 else if (GRAPHICS_VERx100(xe) >= 1255)
557                         return dg2_rcs_offsets;
558                 else if (GRAPHICS_VERx100(xe) >= 1250)
559                         return xehp_rcs_offsets;
560                 else
561                         return gen12_rcs_offsets;
562         } else if (class == XE_ENGINE_CLASS_COPY) {
563                 if (GRAPHICS_VER(xe) >= 20)
564                         return xe2_bcs_offsets;
565                 else
566                         return gen12_xcs_offsets;
567         } else {
568                 if (GRAPHICS_VER(xe) >= 20)
569                         return xe2_xcs_offsets;
570                 else if (GRAPHICS_VERx100(xe) >= 1255)
571                         return dg2_xcs_offsets;
572                 else
573                         return gen12_xcs_offsets;
574         }
575 }
576
577 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
578 {
579         regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
580                                                        CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
581
582         if (xe_gt_has_indirect_ring_state(hwe->gt))
583                 regs[CTX_CONTEXT_CONTROL] |=
584                         _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
585
586         /* TODO: Timestamp */
587 }
588
589 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
590 {
591         struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
592         struct xe_device *xe = gt_to_xe(hwe->gt);
593         u8 num_regs;
594
595         if (!xe_device_uses_memirq(xe))
596                 return;
597
598         regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
599                                         MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
600         regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
601         regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
602
603         num_regs = xe_device_has_msix(xe) ? 3 : 2;
604         regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
605                                        MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
606         regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
607         regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
608         regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
609         regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
610
611         if (xe_device_has_msix(xe)) {
612                 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
613                 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
614         }
615 }
616
617 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
618 {
619         struct xe_device *xe = gt_to_xe(hwe->gt);
620
621         if (GRAPHICS_VERx100(xe) >= 1250)
622                 return 0x70;
623         else
624                 return 0x60;
625 }
626
627 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
628 {
629         int x;
630
631         x = lrc_ring_mi_mode(hwe);
632         regs[x + 1] &= ~STOP_RING;
633         regs[x + 1] |= STOP_RING << 16;
634 }
635
636 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
637 {
638         return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
639 }
640
641 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
642 {
643         return 0;
644 }
645
646 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
647 {
648         return lrc->ring.size;
649 }
650
651 /* Make the magic macros work */
652 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
653 #define __xe_lrc_regs_offset xe_lrc_regs_offset
654
655 #define LRC_SEQNO_PPHWSP_OFFSET 512
656 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
657 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
658 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
659 #define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
660
661 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
662 {
663         return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
664 }
665
666 static size_t lrc_reg_size(struct xe_device *xe)
667 {
668         if (GRAPHICS_VERx100(xe) >= 1250)
669                 return 96 * sizeof(u32);
670         else
671                 return 80 * sizeof(u32);
672 }
673
674 size_t xe_lrc_skip_size(struct xe_device *xe)
675 {
676         return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
677 }
678
679 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
680 {
681         /* The seqno is stored in the driver-defined portion of PPHWSP */
682         return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
683 }
684
685 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
686 {
687         /* The start seqno is stored in the driver-defined portion of PPHWSP */
688         return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
689 }
690
691 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
692 {
693         /* This is stored in the driver-defined portion of PPHWSP */
694         return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
695 }
696
697 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
698 {
699         /* The parallel is stored in the driver-defined portion of PPHWSP */
700         return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
701 }
702
703 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
704 {
705         return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
706 }
707
708 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
709 {
710         return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
711 }
712
713 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
714 {
715         return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
716 }
717
718 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
719 {
720         /* Indirect ring state page is at the very end of LRC */
721         return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
722 }
723
724 #define DECL_MAP_ADDR_HELPERS(elem) \
725 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
726 { \
727         struct iosys_map map = lrc->bo->vmap; \
728 \
729         xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
730         iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
731         return map; \
732 } \
733 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
734 { \
735         return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
736 } \
737
738 DECL_MAP_ADDR_HELPERS(ring)
739 DECL_MAP_ADDR_HELPERS(pphwsp)
740 DECL_MAP_ADDR_HELPERS(seqno)
741 DECL_MAP_ADDR_HELPERS(regs)
742 DECL_MAP_ADDR_HELPERS(start_seqno)
743 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
744 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
745 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
746 DECL_MAP_ADDR_HELPERS(parallel)
747 DECL_MAP_ADDR_HELPERS(indirect_ring)
748 DECL_MAP_ADDR_HELPERS(engine_id)
749
750 #undef DECL_MAP_ADDR_HELPERS
751
752 /**
753  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
754  * @lrc: Pointer to the lrc.
755  *
756  * Returns: ctx timestamp GGTT address
757  */
758 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
759 {
760         return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
761 }
762
763 /**
764  * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
765  * @lrc: Pointer to the lrc.
766  *
767  * Returns: ctx timestamp udw GGTT address
768  */
769 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
770 {
771         return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
772 }
773
774 /**
775  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
776  * @lrc: Pointer to the lrc.
777  *
778  * Returns: ctx timestamp value
779  */
780 u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
781 {
782         struct xe_device *xe = lrc_to_xe(lrc);
783         struct iosys_map map;
784         u32 ldw, udw = 0;
785
786         map = __xe_lrc_ctx_timestamp_map(lrc);
787         ldw = xe_map_read32(xe, &map);
788
789         if (xe->info.has_64bit_timestamp) {
790                 map = __xe_lrc_ctx_timestamp_udw_map(lrc);
791                 udw = xe_map_read32(xe, &map);
792         }
793
794         return (u64)udw << 32 | ldw;
795 }
796
797 /**
798  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
799  * @lrc: Pointer to the lrc.
800  *
801  * Returns: ctx timestamp job GGTT address
802  */
803 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
804 {
805         return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
806 }
807
808 /**
809  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
810  * @lrc: Pointer to the lrc.
811  *
812  * Returns: ctx timestamp job value
813  */
814 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
815 {
816         struct xe_device *xe = lrc_to_xe(lrc);
817         struct iosys_map map;
818
819         map = __xe_lrc_ctx_job_timestamp_map(lrc);
820         return xe_map_read32(xe, &map);
821 }
822
823 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
824 {
825         return __xe_lrc_pphwsp_ggtt_addr(lrc);
826 }
827
828 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
829 {
830         if (!xe_lrc_has_indirect_ring_state(lrc))
831                 return 0;
832
833         return __xe_lrc_indirect_ring_ggtt_addr(lrc);
834 }
835
836 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
837 {
838         struct xe_device *xe = lrc_to_xe(lrc);
839         struct iosys_map map;
840
841         map = __xe_lrc_indirect_ring_map(lrc);
842         iosys_map_incr(&map, reg_nr * sizeof(u32));
843         return xe_map_read32(xe, &map);
844 }
845
846 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
847                                           int reg_nr, u32 val)
848 {
849         struct xe_device *xe = lrc_to_xe(lrc);
850         struct iosys_map map;
851
852         map = __xe_lrc_indirect_ring_map(lrc);
853         iosys_map_incr(&map, reg_nr * sizeof(u32));
854         xe_map_write32(xe, &map, val);
855 }
856
857 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
858 {
859         struct xe_device *xe = lrc_to_xe(lrc);
860         struct iosys_map map;
861
862         map = __xe_lrc_regs_map(lrc);
863         iosys_map_incr(&map, reg_nr * sizeof(u32));
864         return xe_map_read32(xe, &map);
865 }
866
867 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
868 {
869         struct xe_device *xe = lrc_to_xe(lrc);
870         struct iosys_map map;
871
872         map = __xe_lrc_regs_map(lrc);
873         iosys_map_incr(&map, reg_nr * sizeof(u32));
874         xe_map_write32(xe, &map, val);
875 }
876
877 static void *empty_lrc_data(struct xe_hw_engine *hwe)
878 {
879         struct xe_gt *gt = hwe->gt;
880         void *data;
881         u32 *regs;
882
883         data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
884         if (!data)
885                 return NULL;
886
887         /* 1st page: Per-Process of HW status Page */
888         regs = data + LRC_PPHWSP_SIZE;
889         set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
890         set_context_control(regs, hwe);
891         set_memory_based_intr(regs, hwe);
892         reset_stop_ring(regs, hwe);
893         if (xe_gt_has_indirect_ring_state(gt)) {
894                 regs = data + xe_gt_lrc_size(gt, hwe->class) -
895                        LRC_INDIRECT_RING_STATE_SIZE;
896                 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
897         }
898
899         return data;
900 }
901
902 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
903 {
904         u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
905
906         xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
907         xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
908 }
909
910 static void xe_lrc_finish(struct xe_lrc *lrc)
911 {
912         xe_hw_fence_ctx_finish(&lrc->fence_ctx);
913         xe_bo_unpin_map_no_vm(lrc->bo);
914 }
915
916 static size_t wa_bb_offset(struct xe_lrc *lrc)
917 {
918         return lrc->bo->size - LRC_WA_BB_SIZE;
919 }
920
921 /*
922  * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
923  * context run ticks.
924  * @lrc: Pointer to the lrc.
925  *
926  * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
927  * context, but only gets updated when the context switches out. In order to
928  * check how long a context has been active before it switches out, two things
929  * are required:
930  *
931  * (1) Determine if the context is running:
932  * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
933  * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
934  * initialized. During a query, we just check for this value to determine if the
935  * context is active. If the context switched out, it would overwrite this
936  * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
937  * the last part of context restore, so reusing this LRC location will not
938  * clobber anything.
939  *
940  * (2) Calculate the time that the context has been active for:
941  * The CTX_TIMESTAMP ticks only when the context is active. If a context is
942  * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
943  * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
944  * engine instance. Since we do not know which instance the context is running
945  * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
946  * store it in the PPHSWP.
947  */
948 #define CONTEXT_ACTIVE 1ULL
949 static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
950 {
951         const size_t max_size = LRC_WA_BB_SIZE;
952         u32 *cmd, *buf = NULL;
953
954         if (lrc->bo->vmap.is_iomem) {
955                 buf = kmalloc(max_size, GFP_KERNEL);
956                 if (!buf)
957                         return -ENOMEM;
958                 cmd = buf;
959         } else {
960                 cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
961         }
962
963         *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
964         *cmd++ = ENGINE_ID(0).addr;
965         *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
966         *cmd++ = 0;
967
968         *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
969         *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
970         *cmd++ = 0;
971         *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
972
973         if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
974                 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
975                 *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
976                 *cmd++ = 0;
977                 *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
978         }
979
980         *cmd++ = MI_BATCH_BUFFER_END;
981
982         if (buf) {
983                 xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
984                                  wa_bb_offset(lrc), buf,
985                                  (cmd - buf) * sizeof(*cmd));
986                 kfree(buf);
987         }
988
989         xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
990                              wa_bb_offset(lrc) + 1);
991
992         return 0;
993 }
994
995 #define PVC_CTX_ASID            (0x2e + 1)
996 #define PVC_CTX_ACC_CTR_THOLD   (0x2a + 1)
997
998 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
999                        struct xe_vm *vm, u32 ring_size, u16 msix_vec,
1000                        u32 init_flags)
1001 {
1002         struct xe_gt *gt = hwe->gt;
1003         struct xe_tile *tile = gt_to_tile(gt);
1004         struct xe_device *xe = gt_to_xe(gt);
1005         struct iosys_map map;
1006         void *init_data = NULL;
1007         u32 arb_enable;
1008         u32 lrc_size;
1009         u32 bo_flags;
1010         int err;
1011
1012         kref_init(&lrc->refcount);
1013         lrc->gt = gt;
1014         lrc->flags = 0;
1015         lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
1016         if (xe_gt_has_indirect_ring_state(gt))
1017                 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
1018
1019         bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
1020                    XE_BO_FLAG_GGTT_INVALIDATE;
1021         if (vm && vm->xef) /* userspace */
1022                 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
1023
1024         /*
1025          * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
1026          * via VM bind calls.
1027          */
1028         lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
1029                                        lrc_size + LRC_WA_BB_SIZE,
1030                                        ttm_bo_type_kernel,
1031                                        bo_flags);
1032         if (IS_ERR(lrc->bo))
1033                 return PTR_ERR(lrc->bo);
1034
1035         lrc->size = lrc_size;
1036         lrc->ring.size = ring_size;
1037         lrc->ring.tail = 0;
1038
1039         xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
1040                              hwe->fence_irq, hwe->name);
1041
1042         if (!gt->default_lrc[hwe->class]) {
1043                 init_data = empty_lrc_data(hwe);
1044                 if (!init_data) {
1045                         err = -ENOMEM;
1046                         goto err_lrc_finish;
1047                 }
1048         }
1049
1050         /*
1051          * Init Per-Process of HW status Page, LRC / context state to known
1052          * values
1053          */
1054         map = __xe_lrc_pphwsp_map(lrc);
1055         if (!init_data) {
1056                 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
1057                 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
1058                                  gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
1059                                  xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
1060         } else {
1061                 xe_map_memcpy_to(xe, &map, 0, init_data,
1062                                  xe_gt_lrc_size(gt, hwe->class));
1063                 kfree(init_data);
1064         }
1065
1066         if (vm) {
1067                 xe_lrc_set_ppgtt(lrc, vm);
1068
1069                 if (vm->xef)
1070                         xe_drm_client_add_bo(vm->xef->client, lrc->bo);
1071         }
1072
1073         if (xe_device_has_msix(xe)) {
1074                 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
1075                                      xe_memirq_status_ptr(&tile->memirq, hwe));
1076                 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
1077                                      xe_memirq_source_ptr(&tile->memirq, hwe));
1078                 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
1079         }
1080
1081         if (xe_gt_has_indirect_ring_state(gt)) {
1082                 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
1083                                      __xe_lrc_indirect_ring_ggtt_addr(lrc));
1084
1085                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
1086                                               __xe_lrc_ring_ggtt_addr(lrc));
1087                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
1088                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
1089                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
1090                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
1091                                               RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1092         } else {
1093                 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
1094                 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
1095                 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
1096                 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
1097                                      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1098         }
1099
1100         if (init_flags & XE_LRC_CREATE_RUNALONE)
1101                 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1102                                      xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1103                                      _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
1104
1105         if (init_flags & XE_LRC_CREATE_PXP)
1106                 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1107                                      xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1108                                      _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
1109
1110         lrc->ctx_timestamp = 0;
1111         xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
1112         if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1113                 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
1114
1115         if (xe->info.has_asid && vm)
1116                 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
1117
1118         lrc->desc = LRC_VALID;
1119         lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
1120         /* TODO: Priority */
1121
1122         /* While this appears to have something about privileged batches or
1123          * some such, it really just means PPGTT mode.
1124          */
1125         if (vm)
1126                 lrc->desc |= LRC_PRIVILEGE;
1127
1128         if (GRAPHICS_VERx100(xe) < 1250) {
1129                 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1130                 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1131         }
1132
1133         arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1134         xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1135
1136         map = __xe_lrc_seqno_map(lrc);
1137         xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1138
1139         map = __xe_lrc_start_seqno_map(lrc);
1140         xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1141
1142         err = xe_lrc_setup_utilization(lrc);
1143         if (err)
1144                 goto err_lrc_finish;
1145
1146         return 0;
1147
1148 err_lrc_finish:
1149         xe_lrc_finish(lrc);
1150         return err;
1151 }
1152
1153 /**
1154  * xe_lrc_create - Create a LRC
1155  * @hwe: Hardware Engine
1156  * @vm: The VM (address space)
1157  * @ring_size: LRC ring size
1158  * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1159  * @flags: LRC initialization flags
1160  *
1161  * Allocate and initialize the Logical Ring Context (LRC).
1162  *
1163  * Return pointer to created LRC upon success and an error pointer
1164  * upon failure.
1165  */
1166 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1167                              u32 ring_size, u16 msix_vec, u32 flags)
1168 {
1169         struct xe_lrc *lrc;
1170         int err;
1171
1172         lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1173         if (!lrc)
1174                 return ERR_PTR(-ENOMEM);
1175
1176         err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
1177         if (err) {
1178                 kfree(lrc);
1179                 return ERR_PTR(err);
1180         }
1181
1182         return lrc;
1183 }
1184
1185 /**
1186  * xe_lrc_destroy - Destroy the LRC
1187  * @ref: reference to LRC
1188  *
1189  * Called when ref == 0, release resources held by the Logical Ring Context
1190  * (LRC) and free the LRC memory.
1191  */
1192 void xe_lrc_destroy(struct kref *ref)
1193 {
1194         struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1195
1196         xe_lrc_finish(lrc);
1197         kfree(lrc);
1198 }
1199
1200 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1201 {
1202         if (xe_lrc_has_indirect_ring_state(lrc))
1203                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1204         else
1205                 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1206 }
1207
1208 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1209 {
1210         if (xe_lrc_has_indirect_ring_state(lrc))
1211                 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1212         else
1213                 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1214 }
1215
1216 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1217 {
1218         if (xe_lrc_has_indirect_ring_state(lrc))
1219                 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1220         else
1221                 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1222 }
1223
1224 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1225 {
1226         if (xe_lrc_has_indirect_ring_state(lrc))
1227                 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1228         else
1229                 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1230 }
1231
1232 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1233 {
1234         if (xe_lrc_has_indirect_ring_state(lrc))
1235                 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1236         else
1237                 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1238 }
1239
1240 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1241 {
1242         const u32 head = xe_lrc_ring_head(lrc);
1243         const u32 tail = lrc->ring.tail;
1244         const u32 size = lrc->ring.size;
1245
1246         return ((head - tail - 1) & (size - 1)) + 1;
1247 }
1248
1249 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1250                                 const void *data, size_t size)
1251 {
1252         struct xe_device *xe = lrc_to_xe(lrc);
1253
1254         iosys_map_incr(&ring, lrc->ring.tail);
1255         xe_map_memcpy_to(xe, &ring, 0, data, size);
1256         lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1257 }
1258
1259 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1260 {
1261         struct xe_device *xe = lrc_to_xe(lrc);
1262         struct iosys_map ring;
1263         u32 rhs;
1264         size_t aligned_size;
1265
1266         xe_assert(xe, IS_ALIGNED(size, 4));
1267         aligned_size = ALIGN(size, 8);
1268
1269         ring = __xe_lrc_ring_map(lrc);
1270
1271         xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1272         rhs = lrc->ring.size - lrc->ring.tail;
1273         if (size > rhs) {
1274                 __xe_lrc_write_ring(lrc, ring, data, rhs);
1275                 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1276         } else {
1277                 __xe_lrc_write_ring(lrc, ring, data, size);
1278         }
1279
1280         if (aligned_size > size) {
1281                 u32 noop = MI_NOOP;
1282
1283                 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1284         }
1285 }
1286
1287 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1288 {
1289         return lrc->desc | xe_lrc_ggtt_addr(lrc);
1290 }
1291
1292 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1293 {
1294         return __xe_lrc_seqno_ggtt_addr(lrc);
1295 }
1296
1297 /**
1298  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1299  *
1300  * Allocate but don't initialize an lrc seqno fence.
1301  *
1302  * Return: Pointer to the allocated fence or
1303  * negative error pointer on error.
1304  */
1305 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1306 {
1307         return xe_hw_fence_alloc();
1308 }
1309
1310 /**
1311  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1312  * @fence: Pointer to the fence to free.
1313  *
1314  * Frees an lrc seqno fence that hasn't yet been
1315  * initialized.
1316  */
1317 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1318 {
1319         xe_hw_fence_free(fence);
1320 }
1321
1322 /**
1323  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1324  * @lrc: Pointer to the lrc.
1325  * @fence: Pointer to the fence to initialize.
1326  *
1327  * Initializes a pre-allocated lrc seqno fence.
1328  * After initialization, the fence is subject to normal
1329  * dma-fence refcounting.
1330  */
1331 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1332 {
1333         xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1334 }
1335
1336 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1337 {
1338         struct iosys_map map = __xe_lrc_seqno_map(lrc);
1339
1340         return xe_map_read32(lrc_to_xe(lrc), &map);
1341 }
1342
1343 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1344 {
1345         struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1346
1347         return xe_map_read32(lrc_to_xe(lrc), &map);
1348 }
1349
1350 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1351 {
1352         return __xe_lrc_start_seqno_ggtt_addr(lrc);
1353 }
1354
1355 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1356 {
1357         return __xe_lrc_parallel_ggtt_addr(lrc);
1358 }
1359
1360 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1361 {
1362         return __xe_lrc_parallel_map(lrc);
1363 }
1364
1365 /**
1366  * xe_lrc_engine_id() - Read engine id value
1367  * @lrc: Pointer to the lrc.
1368  *
1369  * Returns: context id value
1370  */
1371 static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
1372 {
1373         struct xe_device *xe = lrc_to_xe(lrc);
1374         struct iosys_map map;
1375
1376         map = __xe_lrc_engine_id_map(lrc);
1377         return xe_map_read32(xe, &map);
1378 }
1379
1380 static int instr_dw(u32 cmd_header)
1381 {
1382         /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1383         if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1384             GFXPIPE_SINGLE_DW_CMD(0, 0))
1385                 return 1;
1386
1387         /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1388         if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1389                 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1390
1391         /* Most instructions have the # of dwords (minus 2) in 7:0 */
1392         return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1393 }
1394
1395 static int dump_mi_command(struct drm_printer *p,
1396                            struct xe_gt *gt,
1397                            u32 *dw,
1398                            int remaining_dw)
1399 {
1400         u32 inst_header = *dw;
1401         u32 numdw = instr_dw(inst_header);
1402         u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1403         int num_noop;
1404
1405         /* First check for commands that don't have/use a '# DW' field */
1406         switch (inst_header & MI_OPCODE) {
1407         case MI_NOOP:
1408                 num_noop = 1;
1409                 while (num_noop < remaining_dw &&
1410                        (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1411                         num_noop++;
1412                 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1413                 return num_noop;
1414
1415         case MI_TOPOLOGY_FILTER:
1416                 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1417                 return 1;
1418
1419         case MI_BATCH_BUFFER_END:
1420                 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1421                 /* Return 'remaining_dw' to consume the rest of the LRC */
1422                 return remaining_dw;
1423         }
1424
1425         /*
1426          * Any remaining commands include a # of dwords.  We should make sure
1427          * it doesn't exceed the remaining size of the LRC.
1428          */
1429         if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1430                 numdw = remaining_dw;
1431
1432         switch (inst_header & MI_OPCODE) {
1433         case MI_LOAD_REGISTER_IMM:
1434                 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1435                            inst_header, (numdw - 1) / 2);
1436                 for (int i = 1; i < numdw; i += 2)
1437                         drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1438                 return numdw;
1439
1440         case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1441                 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1442                            inst_header,
1443                            dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1444                            dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1445                 if (numdw == 4)
1446                         drm_printf(p, " - %#6x = %#010llx\n",
1447                                    dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1448                 else
1449                         drm_printf(p, " - %*ph (%s)\n",
1450                                    (int)sizeof(u32) * (numdw - 1), dw + 1,
1451                                    numdw < 4 ? "truncated" : "malformed");
1452                 return numdw;
1453
1454         case MI_FORCE_WAKEUP:
1455                 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1456                 return numdw;
1457
1458         default:
1459                 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1460                            inst_header, opcode, numdw);
1461                 return numdw;
1462         }
1463 }
1464
1465 static int dump_gfxpipe_command(struct drm_printer *p,
1466                                 struct xe_gt *gt,
1467                                 u32 *dw,
1468                                 int remaining_dw)
1469 {
1470         u32 numdw = instr_dw(*dw);
1471         u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1472         u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1473         u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1474
1475         /*
1476          * Make sure we haven't mis-parsed a number of dwords that exceeds the
1477          * remaining size of the LRC.
1478          */
1479         if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1480                 numdw = remaining_dw;
1481
1482         switch (*dw & GFXPIPE_MATCH_MASK) {
1483 #define MATCH(cmd) \
1484         case cmd: \
1485                 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1486                 return numdw
1487 #define MATCH3D(cmd) \
1488         case CMD_##cmd: \
1489                 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1490                 return numdw
1491
1492         MATCH(STATE_BASE_ADDRESS);
1493         MATCH(STATE_SIP);
1494         MATCH(GPGPU_CSR_BASE_ADDRESS);
1495         MATCH(STATE_COMPUTE_MODE);
1496         MATCH3D(3DSTATE_BTD);
1497         MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1498         MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1499
1500         MATCH3D(3DSTATE_VF_STATISTICS);
1501
1502         MATCH(PIPELINE_SELECT);
1503
1504         MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1505         MATCH3D(3DSTATE_CLEAR_PARAMS);
1506         MATCH3D(3DSTATE_DEPTH_BUFFER);
1507         MATCH3D(3DSTATE_STENCIL_BUFFER);
1508         MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1509         MATCH3D(3DSTATE_VERTEX_BUFFERS);
1510         MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1511         MATCH3D(3DSTATE_INDEX_BUFFER);
1512         MATCH3D(3DSTATE_VF);
1513         MATCH3D(3DSTATE_MULTISAMPLE);
1514         MATCH3D(3DSTATE_CC_STATE_POINTERS);
1515         MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1516         MATCH3D(3DSTATE_VS);
1517         MATCH3D(3DSTATE_GS);
1518         MATCH3D(3DSTATE_CLIP);
1519         MATCH3D(3DSTATE_SF);
1520         MATCH3D(3DSTATE_WM);
1521         MATCH3D(3DSTATE_CONSTANT_VS);
1522         MATCH3D(3DSTATE_CONSTANT_GS);
1523         MATCH3D(3DSTATE_CONSTANT_PS);
1524         MATCH3D(3DSTATE_SAMPLE_MASK);
1525         MATCH3D(3DSTATE_CONSTANT_HS);
1526         MATCH3D(3DSTATE_CONSTANT_DS);
1527         MATCH3D(3DSTATE_HS);
1528         MATCH3D(3DSTATE_TE);
1529         MATCH3D(3DSTATE_DS);
1530         MATCH3D(3DSTATE_STREAMOUT);
1531         MATCH3D(3DSTATE_SBE);
1532         MATCH3D(3DSTATE_PS);
1533         MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1534         MATCH3D(3DSTATE_CPS_POINTERS);
1535         MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1536         MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1537         MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1538         MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1539         MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1540         MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1541         MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1542         MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1543         MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1544         MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1545         MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1546         MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1547         MATCH3D(3DSTATE_VF_INSTANCING);
1548         MATCH3D(3DSTATE_VF_SGVS);
1549         MATCH3D(3DSTATE_VF_TOPOLOGY);
1550         MATCH3D(3DSTATE_WM_CHROMAKEY);
1551         MATCH3D(3DSTATE_PS_BLEND);
1552         MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1553         MATCH3D(3DSTATE_PS_EXTRA);
1554         MATCH3D(3DSTATE_RASTER);
1555         MATCH3D(3DSTATE_SBE_SWIZ);
1556         MATCH3D(3DSTATE_WM_HZ_OP);
1557         MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1558         MATCH3D(3DSTATE_VF_SGVS_2);
1559         MATCH3D(3DSTATE_VFG);
1560         MATCH3D(3DSTATE_URB_ALLOC_VS);
1561         MATCH3D(3DSTATE_URB_ALLOC_HS);
1562         MATCH3D(3DSTATE_URB_ALLOC_DS);
1563         MATCH3D(3DSTATE_URB_ALLOC_GS);
1564         MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1565         MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1566         MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1567         MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1568         MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1569         MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1570         MATCH3D(3DSTATE_AMFS);
1571         MATCH3D(3DSTATE_DEPTH_BOUNDS);
1572         MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1573         MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1574         MATCH3D(3DSTATE_MESH_CONTROL);
1575         MATCH3D(3DSTATE_MESH_DISTRIB);
1576         MATCH3D(3DSTATE_TASK_REDISTRIB);
1577         MATCH3D(3DSTATE_MESH_SHADER);
1578         MATCH3D(3DSTATE_MESH_SHADER_DATA);
1579         MATCH3D(3DSTATE_TASK_CONTROL);
1580         MATCH3D(3DSTATE_TASK_SHADER);
1581         MATCH3D(3DSTATE_TASK_SHADER_DATA);
1582         MATCH3D(3DSTATE_URB_ALLOC_MESH);
1583         MATCH3D(3DSTATE_URB_ALLOC_TASK);
1584         MATCH3D(3DSTATE_CLIP_MESH);
1585         MATCH3D(3DSTATE_SBE_MESH);
1586         MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1587         MATCH3D(3DSTATE_COARSE_PIXEL);
1588
1589         MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1590         MATCH3D(3DSTATE_CHROMA_KEY);
1591         MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1592         MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1593         MATCH3D(3DSTATE_LINE_STIPPLE);
1594         MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1595         MATCH3D(3DSTATE_MONOFILTER_SIZE);
1596         MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1597         MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1598         MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1599         MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1600         MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1601         MATCH3D(3DSTATE_SO_DECL_LIST);
1602         MATCH3D(3DSTATE_SO_BUFFER);
1603         MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1604         MATCH3D(3DSTATE_SAMPLE_PATTERN);
1605         MATCH3D(3DSTATE_3D_MODE);
1606         MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1607         MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1608         MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1609
1610         default:
1611                 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1612                            *dw, pipeline, opcode, subopcode, numdw);
1613                 return numdw;
1614         }
1615 }
1616
1617 static int dump_gfx_state_command(struct drm_printer *p,
1618                                   struct xe_gt *gt,
1619                                   u32 *dw,
1620                                   int remaining_dw)
1621 {
1622         u32 numdw = instr_dw(*dw);
1623         u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1624
1625         /*
1626          * Make sure we haven't mis-parsed a number of dwords that exceeds the
1627          * remaining size of the LRC.
1628          */
1629         if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1630                 numdw = remaining_dw;
1631
1632         switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1633         MATCH(STATE_WRITE_INLINE);
1634
1635         default:
1636                 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1637                            *dw, opcode, numdw);
1638                 return numdw;
1639         }
1640 }
1641
1642 void xe_lrc_dump_default(struct drm_printer *p,
1643                          struct xe_gt *gt,
1644                          enum xe_engine_class hwe_class)
1645 {
1646         u32 *dw;
1647         int remaining_dw, num_dw;
1648
1649         if (!gt->default_lrc[hwe_class]) {
1650                 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1651                 return;
1652         }
1653
1654         /*
1655          * Skip the beginning of the LRC since it contains the per-process
1656          * hardware status page.
1657          */
1658         dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1659         remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1660
1661         while (remaining_dw > 0) {
1662                 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1663                         num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1664                 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1665                         num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1666                 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1667                         num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1668                 } else {
1669                         num_dw = min(instr_dw(*dw), remaining_dw);
1670                         drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1671                                    *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1672                                    num_dw);
1673                 }
1674
1675                 dw += num_dw;
1676                 remaining_dw -= num_dw;
1677         }
1678 }
1679
1680 struct instr_state {
1681         u32 instr;
1682         u16 num_dw;
1683 };
1684
1685 static const struct instr_state xe_hpg_svg_state[] = {
1686         { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1687         { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1688         { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1689         { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1690         { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1691         { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1692         { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1693         { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1694         { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1695         { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1696         { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1697         { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1698         { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1699         { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1700         { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1701         { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1702         { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1703         { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1704         { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1705         { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1706         { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1707         { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1708         { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1709         { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1710         { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1711         { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1712         { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1713         { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1714         { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1715         { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1716         { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1717         { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1718         { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1719         { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1720         { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1721         { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1722         { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1723         { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1724         { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1725         { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1726         { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1727         { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1728         { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1729         { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1730         { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1731         { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1732         { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1733         { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1734         { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1735         { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1736 };
1737
1738 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1739 {
1740         struct xe_gt *gt = q->hwe->gt;
1741         struct xe_device *xe = gt_to_xe(gt);
1742         const struct instr_state *state_table = NULL;
1743         int state_table_size = 0;
1744
1745         /*
1746          * Wa_14019789679
1747          *
1748          * If the driver doesn't explicitly emit the SVG instructions while
1749          * setting up the default LRC, the context switch will write 0's
1750          * (noops) into the LRC memory rather than the expected instruction
1751          * headers.  Application contexts start out as a copy of the default
1752          * LRC, and if they also do not emit specific settings for some SVG
1753          * state, then on context restore they'll unintentionally inherit
1754          * whatever state setting the previous context had programmed into the
1755          * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1756          * prevent the hardware from resetting that state back to any specific
1757          * value).
1758          *
1759          * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1760          * since that's a specific state setting that can easily cause GPU
1761          * hangs if unintentionally inherited.  However to be safe we'll
1762          * continue to emit all of the SVG state since it's best not to leak
1763          * any of the state between contexts, even if that leakage is harmless.
1764          */
1765         if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1766                 state_table = xe_hpg_svg_state;
1767                 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1768         }
1769
1770         if (!state_table) {
1771                 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1772                           GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1773                 return;
1774         }
1775
1776         for (int i = 0; i < state_table_size; i++) {
1777                 u32 instr = state_table[i].instr;
1778                 u16 num_dw = state_table[i].num_dw;
1779                 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1780
1781                 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1782                 xe_gt_assert(gt, num_dw != 0);
1783                 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1784
1785                 /*
1786                  * Xe2's SVG context is the same as the one on DG2 / MTL
1787                  * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1788                  * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1789                  * Just make the replacement here rather than defining a
1790                  * whole separate table for the single trivial change.
1791                  */
1792                 if (GRAPHICS_VER(xe) >= 20 &&
1793                     instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1794                         instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1795
1796                 bb->cs[bb->len] = instr;
1797                 if (!is_single_dw)
1798                         bb->cs[bb->len] |= (num_dw - 2);
1799
1800                 bb->len += num_dw;
1801         }
1802 }
1803
1804 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1805 {
1806         struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1807
1808         if (!snapshot)
1809                 return NULL;
1810
1811         snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1812         snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1813         snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1814         snapshot->head = xe_lrc_ring_head(lrc);
1815         snapshot->tail.internal = lrc->ring.tail;
1816         snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1817         snapshot->start = xe_lrc_ring_start(lrc);
1818         snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1819         snapshot->seqno = xe_lrc_seqno(lrc);
1820         snapshot->lrc_bo = xe_bo_get(lrc->bo);
1821         snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1822         snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
1823                 LRC_WA_BB_SIZE;
1824         snapshot->lrc_snapshot = NULL;
1825         snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
1826         snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1827         return snapshot;
1828 }
1829
1830 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1831 {
1832         struct xe_bo *bo;
1833         struct iosys_map src;
1834
1835         if (!snapshot)
1836                 return;
1837
1838         bo = snapshot->lrc_bo;
1839         snapshot->lrc_bo = NULL;
1840
1841         snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1842         if (!snapshot->lrc_snapshot)
1843                 goto put_bo;
1844
1845         xe_bo_lock(bo, false);
1846         if (!ttm_bo_vmap(&bo->ttm, &src)) {
1847                 xe_map_memcpy_from(xe_bo_device(bo),
1848                                    snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1849                                    snapshot->lrc_size);
1850                 ttm_bo_vunmap(&bo->ttm, &src);
1851         } else {
1852                 kvfree(snapshot->lrc_snapshot);
1853                 snapshot->lrc_snapshot = NULL;
1854         }
1855         xe_bo_unlock(bo);
1856 put_bo:
1857         xe_bo_put(bo);
1858 }
1859
1860 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1861 {
1862         unsigned long i;
1863
1864         if (!snapshot)
1865                 return;
1866
1867         drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1868         drm_printf(p, "\tHW Ring address: 0x%08x\n",
1869                    snapshot->ring_addr);
1870         drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1871                    snapshot->indirect_context_desc);
1872         drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1873         drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1874                    snapshot->tail.internal, snapshot->tail.memory);
1875         drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1876         drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1877         drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1878         drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1879         drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1880
1881         if (!snapshot->lrc_snapshot)
1882                 return;
1883
1884         drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1885         drm_puts(p, "\t[HWSP].data: ");
1886         for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1887                 u32 *val = snapshot->lrc_snapshot + i;
1888                 char dumped[ASCII85_BUFSZ];
1889
1890                 drm_puts(p, ascii85_encode(*val, dumped));
1891         }
1892
1893         drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1894         drm_puts(p, "\t[HWCTX].data: ");
1895         for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1896                 u32 *val = snapshot->lrc_snapshot + i;
1897                 char dumped[ASCII85_BUFSZ];
1898
1899                 drm_puts(p, ascii85_encode(*val, dumped));
1900         }
1901         drm_puts(p, "\n");
1902 }
1903
1904 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1905 {
1906         if (!snapshot)
1907                 return;
1908
1909         kvfree(snapshot->lrc_snapshot);
1910         if (snapshot->lrc_bo)
1911                 xe_bo_put(snapshot->lrc_bo);
1912
1913         kfree(snapshot);
1914 }
1915
1916 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
1917 {
1918         u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
1919         u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
1920         struct xe_hw_engine *hwe;
1921         u64 val;
1922
1923         hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
1924         if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
1925                             "Unexpected engine class:instance %d:%d for context utilization\n",
1926                             class, instance))
1927                 return -1;
1928
1929         if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1930                 val = xe_mmio_read64_2x32(&hwe->gt->mmio,
1931                                           RING_CTX_TIMESTAMP(hwe->mmio_base));
1932         else
1933                 val = xe_mmio_read32(&hwe->gt->mmio,
1934                                      RING_CTX_TIMESTAMP(hwe->mmio_base));
1935
1936         *reg_ctx_ts = val;
1937
1938         return 0;
1939 }
1940
1941 /**
1942  * xe_lrc_update_timestamp() - Update ctx timestamp
1943  * @lrc: Pointer to the lrc.
1944  * @old_ts: Old timestamp value
1945  *
1946  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1947  * update saved value. With support for active contexts, the calculation may be
1948  * slightly racy, so follow a read-again logic to ensure that the context is
1949  * still active before returning the right timestamp.
1950  *
1951  * Returns: New ctx timestamp value
1952  */
1953 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
1954 {
1955         u64 lrc_ts, reg_ts;
1956         u32 engine_id;
1957
1958         *old_ts = lrc->ctx_timestamp;
1959
1960         lrc_ts = xe_lrc_ctx_timestamp(lrc);
1961         /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
1962         if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
1963                 lrc->ctx_timestamp = lrc_ts;
1964                 goto done;
1965         }
1966
1967         if (lrc_ts == CONTEXT_ACTIVE) {
1968                 engine_id = xe_lrc_engine_id(lrc);
1969                 if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
1970                         lrc->ctx_timestamp = reg_ts;
1971
1972                 /* read lrc again to ensure context is still active */
1973                 lrc_ts = xe_lrc_ctx_timestamp(lrc);
1974         }
1975
1976         /*
1977          * If context switched out, just use the lrc_ts. Note that this needs to
1978          * be a separate if condition.
1979          */
1980         if (lrc_ts != CONTEXT_ACTIVE)
1981                 lrc->ctx_timestamp = lrc_ts;
1982
1983 done:
1984         trace_xe_lrc_update_timestamp(lrc, *old_ts);
1985
1986         return lrc->ctx_timestamp;
1987 }
1988
1989 /**
1990  * xe_lrc_ring_is_idle() - LRC is idle
1991  * @lrc: Pointer to the lrc.
1992  *
1993  * Compare LRC ring head and tail to determine if idle.
1994  *
1995  * Return: True is ring is idle, False otherwise
1996  */
1997 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1998 {
1999         return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
2000 }