215606b5fae0edefea0a03085081983f89df0ef3
[linux-block.git] / drivers / gpu / drm / xe / xe_ring_ops.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5
6 #include "xe_ring_ops.h"
7
8 #include "regs/xe_gpu_commands.h"
9 #include "regs/xe_gt_regs.h"
10 #include "regs/xe_lrc_layout.h"
11 #include "regs/xe_regs.h"
12 #include "xe_engine_types.h"
13 #include "xe_gt.h"
14 #include "xe_lrc.h"
15 #include "xe_macros.h"
16 #include "xe_sched_job.h"
17 #include "xe_vm_types.h"
18 #include "xe_vm.h"
19
20 /*
21  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
22  * pipeline (i.e., CCS engines).
23  */
24 #define PIPE_CONTROL_3D_ENGINE_FLAGS (\
25                 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
26                 PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
27                 PIPE_CONTROL_TILE_CACHE_FLUSH | \
28                 PIPE_CONTROL_DEPTH_STALL | \
29                 PIPE_CONTROL_STALL_AT_SCOREBOARD | \
30                 PIPE_CONTROL_PSD_SYNC | \
31                 PIPE_CONTROL_AMFS_FLUSH | \
32                 PIPE_CONTROL_VF_CACHE_INVALIDATE | \
33                 PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
34
35 /* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
36 #define PIPE_CONTROL_3D_ARCH_FLAGS ( \
37                 PIPE_CONTROL_3D_ENGINE_FLAGS | \
38                 PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
39                 PIPE_CONTROL_FLUSH_ENABLE | \
40                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
41                 PIPE_CONTROL_DC_FLUSH_ENABLE)
42
43 static u32 preparser_disable(bool state)
44 {
45         return MI_ARB_CHECK | BIT(8) | state;
46 }
47
48 static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
49                               u32 *dw, int i)
50 {
51         dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
52         dw[i++] = reg.addr + gt->mmio.adj_offset;
53         dw[i++] = AUX_INV;
54         dw[i++] = MI_NOOP;
55
56         return i;
57 }
58
59 static int emit_user_interrupt(u32 *dw, int i)
60 {
61         dw[i++] = MI_USER_INTERRUPT;
62         dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
63         dw[i++] = MI_ARB_CHECK;
64
65         return i;
66 }
67
68 static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
69 {
70         dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
71         dw[i++] = addr;
72         dw[i++] = 0;
73         dw[i++] = value;
74
75         return i;
76 }
77
78 static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
79                                u32 *dw, int i)
80 {
81         dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW |
82                 (invalidate_tlb ? MI_INVALIDATE_TLB : 0);
83         dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
84         dw[i++] = 0;
85         dw[i++] = value;
86
87         return i;
88 }
89
90 static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
91 {
92         dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag;
93         dw[i++] = lower_32_bits(batch_addr);
94         dw[i++] = upper_32_bits(batch_addr);
95
96         return i;
97 }
98
99 static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
100 {
101         dw[i] = MI_FLUSH_DW + 1;
102         dw[i] |= flag;
103         dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
104                 MI_FLUSH_DW_STORE_INDEX;
105
106         dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
107         dw[i++] = 0;
108         dw[i++] = ~0U;
109
110         return i;
111 }
112
113 static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
114                                 int i)
115 {
116         u32 flags = PIPE_CONTROL_CS_STALL |
117                 PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
118                 PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
119                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
120                 PIPE_CONTROL_VF_CACHE_INVALIDATE |
121                 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
122                 PIPE_CONTROL_STATE_CACHE_INVALIDATE |
123                 PIPE_CONTROL_QW_WRITE |
124                 PIPE_CONTROL_STORE_DATA_INDEX;
125
126         if (invalidate_tlb)
127                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
128
129         flags &= ~mask_flags;
130
131         dw[i++] = GFX_OP_PIPE_CONTROL(6);
132         dw[i++] = flags;
133         dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
134         dw[i++] = 0;
135         dw[i++] = 0;
136         dw[i++] = 0;
137
138         return i;
139 }
140
141 #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
142
143 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
144                                        u32 *dw, int i)
145 {
146         dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
147         dw[i++] = lower_32_bits(addr);
148         dw[i++] = upper_32_bits(addr);
149         dw[i++] = lower_32_bits(value);
150         dw[i++] = upper_32_bits(value);
151
152         return i;
153 }
154
155 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
156                               int i)
157 {
158         dw[i++] = GFX_OP_PIPE_CONTROL(6);
159         dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
160                    PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
161                 PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
162         dw[i++] = addr;
163         dw[i++] = 0;
164         dw[i++] = value;
165         dw[i++] = 0; /* We're thrashing one extra dword. */
166
167         return i;
168 }
169
170 static u32 get_ppgtt_flag(struct xe_sched_job *job)
171 {
172         return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
173 }
174
175 static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
176                                   u64 batch_addr, u32 seqno)
177 {
178         u32 dw[MAX_JOB_SIZE_DW], i = 0;
179         u32 ppgtt_flag = get_ppgtt_flag(job);
180         struct xe_vm *vm = job->engine->vm;
181
182         if (vm->batch_invalidate_tlb) {
183                 dw[i++] = preparser_disable(true);
184                 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
185                                         seqno, true, dw, i);
186                 dw[i++] = preparser_disable(false);
187         } else {
188                 i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
189                                         seqno, dw, i);
190         }
191
192         i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
193
194         if (job->user_fence.used)
195                 i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
196                                                 job->user_fence.value,
197                                                 dw, i);
198
199         i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
200
201         i = emit_user_interrupt(dw, i);
202
203         XE_BUG_ON(i > MAX_JOB_SIZE_DW);
204
205         xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
206 }
207
208 static bool has_aux_ccs(struct xe_device *xe)
209 {
210         /*
211          * PVC is a special case that has no compression of either type
212          * (FlatCCS or AuxCCS).
213          */
214         if (xe->info.platform == XE_PVC)
215                 return false;
216
217         return !xe->info.has_flat_ccs;
218 }
219
220 static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
221                                    u64 batch_addr, u32 seqno)
222 {
223         u32 dw[MAX_JOB_SIZE_DW], i = 0;
224         u32 ppgtt_flag = get_ppgtt_flag(job);
225         struct xe_gt *gt = job->engine->gt;
226         struct xe_device *xe = gt_to_xe(gt);
227         bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
228         struct xe_vm *vm = job->engine->vm;
229
230         dw[i++] = preparser_disable(true);
231
232         /* hsdes: 1809175790 */
233         if (has_aux_ccs(xe)) {
234                 if (decode)
235                         i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
236                 else
237                         i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
238         }
239
240         if (vm->batch_invalidate_tlb)
241                 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
242                                         seqno, true, dw, i);
243
244         dw[i++] = preparser_disable(false);
245
246         if (!vm->batch_invalidate_tlb)
247                 i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
248                                         seqno, dw, i);
249
250         i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
251
252         if (job->user_fence.used)
253                 i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
254                                                 job->user_fence.value,
255                                                 dw, i);
256
257         i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
258
259         i = emit_user_interrupt(dw, i);
260
261         XE_BUG_ON(i > MAX_JOB_SIZE_DW);
262
263         xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
264 }
265
266 static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
267                                             struct xe_lrc *lrc,
268                                             u64 batch_addr, u32 seqno)
269 {
270         u32 dw[MAX_JOB_SIZE_DW], i = 0;
271         u32 ppgtt_flag = get_ppgtt_flag(job);
272         struct xe_gt *gt = job->engine->gt;
273         struct xe_device *xe = gt_to_xe(gt);
274         bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
275         struct xe_vm *vm = job->engine->vm;
276         u32 mask_flags = 0;
277
278         dw[i++] = preparser_disable(true);
279         if (lacks_render)
280                 mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
281         else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
282                 mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
283
284         /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
285         i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i);
286
287         /* hsdes: 1809175790 */
288         if (has_aux_ccs(xe))
289                 i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
290
291         dw[i++] = preparser_disable(false);
292
293         i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
294                                 seqno, dw, i);
295
296         i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
297
298         if (job->user_fence.used)
299                 i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
300                                                 job->user_fence.value,
301                                                 dw, i);
302
303         i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
304
305         i = emit_user_interrupt(dw, i);
306
307         XE_BUG_ON(i > MAX_JOB_SIZE_DW);
308
309         xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
310 }
311
312 static void emit_migration_job_gen12(struct xe_sched_job *job,
313                                      struct xe_lrc *lrc, u32 seqno)
314 {
315         u32 dw[MAX_JOB_SIZE_DW], i = 0;
316
317         i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
318                                 seqno, dw, i);
319
320         i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
321
322         /* XXX: Do we need this? Leaving for now. */
323         dw[i++] = preparser_disable(true);
324         i = emit_flush_invalidate(0, dw, i);
325         dw[i++] = preparser_disable(false);
326
327         i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
328
329         dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
330                    MI_FLUSH_DW_OP_STOREDW) + 1;
331         dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
332         dw[i++] = 0;
333         dw[i++] = seqno; /* value */
334
335         i = emit_user_interrupt(dw, i);
336
337         XE_BUG_ON(i > MAX_JOB_SIZE_DW);
338
339         xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
340 }
341
342 static void emit_job_gen12_copy(struct xe_sched_job *job)
343 {
344         int i;
345
346         if (xe_sched_job_is_migration(job->engine)) {
347                 emit_migration_job_gen12(job, job->engine->lrc,
348                                          xe_sched_job_seqno(job));
349                 return;
350         }
351
352         for (i = 0; i < job->engine->width; ++i)
353                 __emit_job_gen12_copy(job, job->engine->lrc + i,
354                                       job->batch_addr[i],
355                                       xe_sched_job_seqno(job));
356 }
357
358 static void emit_job_gen12_video(struct xe_sched_job *job)
359 {
360         int i;
361
362         /* FIXME: Not doing parallel handshake for now */
363         for (i = 0; i < job->engine->width; ++i)
364                 __emit_job_gen12_video(job, job->engine->lrc + i,
365                                        job->batch_addr[i],
366                                        xe_sched_job_seqno(job));
367 }
368
369 static void emit_job_gen12_render_compute(struct xe_sched_job *job)
370 {
371         int i;
372
373         for (i = 0; i < job->engine->width; ++i)
374                 __emit_job_gen12_render_compute(job, job->engine->lrc + i,
375                                                 job->batch_addr[i],
376                                                 xe_sched_job_seqno(job));
377 }
378
379 static const struct xe_ring_ops ring_ops_gen12_copy = {
380         .emit_job = emit_job_gen12_copy,
381 };
382
383 static const struct xe_ring_ops ring_ops_gen12_video = {
384         .emit_job = emit_job_gen12_video,
385 };
386
387 static const struct xe_ring_ops ring_ops_gen12_render_compute = {
388         .emit_job = emit_job_gen12_render_compute,
389 };
390
391 const struct xe_ring_ops *
392 xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
393 {
394         switch (class) {
395         case XE_ENGINE_CLASS_COPY:
396                 return &ring_ops_gen12_copy;
397         case XE_ENGINE_CLASS_VIDEO_DECODE:
398         case XE_ENGINE_CLASS_VIDEO_ENHANCE:
399                 return &ring_ops_gen12_video;
400         case XE_ENGINE_CLASS_RENDER:
401         case XE_ENGINE_CLASS_COMPUTE:
402                 return &ring_ops_gen12_render_compute;
403         default:
404                 return NULL;
405         }
406 }