Merge tag 'pinctrl-v6.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux-block.git] / drivers / gpu / drm / xe / xe_sched_job.c
CommitLineData
dd08ebf6
MB
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_sched_job.h"
7
8#include <linux/dma-fence-array.h>
9#include <linux/slab.h>
10
765b65e5 11#include "xe_device.h"
c22a4ed0 12#include "xe_exec_queue.h"
dd08ebf6
MB
13#include "xe_gt.h"
14#include "xe_hw_engine_types.h"
15#include "xe_hw_fence.h"
16#include "xe_lrc.h"
17#include "xe_macros.h"
18#include "xe_trace.h"
19#include "xe_vm.h"
20
21static struct kmem_cache *xe_sched_job_slab;
22static struct kmem_cache *xe_sched_job_parallel_slab;
23
24int __init xe_sched_job_module_init(void)
25{
26 xe_sched_job_slab =
27 kmem_cache_create("xe_sched_job",
28 sizeof(struct xe_sched_job) +
29 sizeof(u64), 0,
30 SLAB_HWCACHE_ALIGN, NULL);
31 if (!xe_sched_job_slab)
32 return -ENOMEM;
33
34 xe_sched_job_parallel_slab =
35 kmem_cache_create("xe_sched_job_parallel",
36 sizeof(struct xe_sched_job) +
37 sizeof(u64) *
4cd6d492 38 XE_HW_ENGINE_MAX_INSTANCE, 0,
dd08ebf6
MB
39 SLAB_HWCACHE_ALIGN, NULL);
40 if (!xe_sched_job_parallel_slab) {
41 kmem_cache_destroy(xe_sched_job_slab);
42 return -ENOMEM;
43 }
44
45 return 0;
46}
47
48void xe_sched_job_module_exit(void)
49{
50 kmem_cache_destroy(xe_sched_job_slab);
51 kmem_cache_destroy(xe_sched_job_parallel_slab);
52}
53
54static struct xe_sched_job *job_alloc(bool parallel)
55{
56 return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
57 xe_sched_job_slab, GFP_KERNEL);
58}
59
9b9529ce 60bool xe_sched_job_is_migration(struct xe_exec_queue *q)
dd08ebf6 61{
9e952635 62 return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
dd08ebf6
MB
63}
64
65static void job_free(struct xe_sched_job *job)
66{
9b9529ce
FD
67 struct xe_exec_queue *q = job->q;
68 bool is_migration = xe_sched_job_is_migration(q);
dd08ebf6 69
9b9529ce 70 kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
dd08ebf6
MB
71 xe_sched_job_parallel_slab : xe_sched_job_slab, job);
72}
73
765b65e5
MB
74static struct xe_device *job_to_xe(struct xe_sched_job *job)
75{
9b9529ce 76 return gt_to_xe(job->q->gt);
765b65e5
MB
77}
78
9b9529ce 79struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
dd08ebf6
MB
80 u64 *batch_addr)
81{
82 struct xe_sched_job *job;
83 struct dma_fence **fences;
9b9529ce 84 bool is_migration = xe_sched_job_is_migration(q);
dd08ebf6
MB
85 int err;
86 int i, j;
87 u32 width;
88
1c66c0f3
DCS
89 /* only a kernel context can submit a vm-less job */
90 XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
91
dd08ebf6 92 /* Migration and kernel engines have their own locking */
9e952635 93 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
9b9529ce 94 lockdep_assert_held(&q->vm->lock);
fdb6a053 95 if (!xe_vm_in_lr_mode(q->vm))
9b9529ce 96 xe_vm_assert_held(q->vm);
dd08ebf6
MB
97 }
98
9b9529ce 99 job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
dd08ebf6
MB
100 if (!job)
101 return ERR_PTR(-ENOMEM);
102
9b9529ce 103 job->q = q;
dd08ebf6 104 kref_init(&job->refcount);
9b9529ce 105 xe_exec_queue_get(job->q);
dd08ebf6 106
9b9529ce 107 err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
dd08ebf6
MB
108 if (err)
109 goto err_free;
110
9b9529ce
FD
111 if (!xe_exec_queue_is_parallel(q)) {
112 job->fence = xe_lrc_create_seqno_fence(q->lrc);
dd08ebf6
MB
113 if (IS_ERR(job->fence)) {
114 err = PTR_ERR(job->fence);
115 goto err_sched_job;
116 }
117 } else {
118 struct dma_fence_array *cf;
119
9b9529ce 120 fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
dd08ebf6
MB
121 if (!fences) {
122 err = -ENOMEM;
123 goto err_sched_job;
124 }
125
9b9529ce
FD
126 for (j = 0; j < q->width; ++j) {
127 fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
dd08ebf6
MB
128 if (IS_ERR(fences[j])) {
129 err = PTR_ERR(fences[j]);
130 goto err_fences;
131 }
132 }
133
9b9529ce
FD
134 cf = dma_fence_array_create(q->width, fences,
135 q->parallel.composite_fence_ctx,
136 q->parallel.composite_fence_seqno++,
dd08ebf6
MB
137 false);
138 if (!cf) {
9b9529ce 139 --q->parallel.composite_fence_seqno;
dd08ebf6
MB
140 err = -ENOMEM;
141 goto err_fences;
142 }
143
144 /* Sanity check */
9b9529ce 145 for (j = 0; j < q->width; ++j)
c73acc1e 146 xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
dd08ebf6
MB
147
148 job->fence = &cf->base;
149 }
150
9b9529ce 151 width = q->width;
dd08ebf6
MB
152 if (is_migration)
153 width = 2;
154
155 for (i = 0; i < width; ++i)
156 job->batch_addr[i] = batch_addr[i];
157
765b65e5 158 /* All other jobs require a VM to be open which has a ref */
9b9529ce 159 if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
765b65e5
MB
160 xe_device_mem_access_get(job_to_xe(job));
161 xe_device_assert_mem_access(job_to_xe(job));
162
dd08ebf6
MB
163 trace_xe_sched_job_create(job);
164 return job;
165
166err_fences:
167 for (j = j - 1; j >= 0; --j) {
9b9529ce 168 --q->lrc[j].fence_ctx.next_seqno;
dd08ebf6
MB
169 dma_fence_put(fences[j]);
170 }
171 kfree(fences);
172err_sched_job:
173 drm_sched_job_cleanup(&job->drm);
174err_free:
9b9529ce 175 xe_exec_queue_put(q);
dd08ebf6
MB
176 job_free(job);
177 return ERR_PTR(err);
178}
179
180/**
181 * xe_sched_job_destroy - Destroy XE schedule job
182 * @ref: reference to XE schedule job
183 *
184 * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
185 * base DRM schedule job, and free memory for XE schedule job.
186 */
187void xe_sched_job_destroy(struct kref *ref)
188{
189 struct xe_sched_job *job =
190 container_of(ref, struct xe_sched_job, refcount);
191
9b9529ce 192 if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
765b65e5 193 xe_device_mem_access_put(job_to_xe(job));
9b9529ce 194 xe_exec_queue_put(job->q);
dd08ebf6
MB
195 dma_fence_put(job->fence);
196 drm_sched_job_cleanup(&job->drm);
197 job_free(job);
198}
199
200void xe_sched_job_set_error(struct xe_sched_job *job, int error)
201{
202 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
203 return;
204
205 dma_fence_set_error(job->fence, error);
206
207 if (dma_fence_is_array(job->fence)) {
208 struct dma_fence_array *array =
209 to_dma_fence_array(job->fence);
210 struct dma_fence **child = array->fences;
211 unsigned int nchild = array->num_fences;
212
213 do {
214 struct dma_fence *current_fence = *child++;
215
216 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
217 &current_fence->flags))
218 continue;
219 dma_fence_set_error(current_fence, error);
220 } while (--nchild);
221 }
222
223 trace_xe_sched_job_set_error(job);
224
225 dma_fence_enable_sw_signaling(job->fence);
9b9529ce 226 xe_hw_fence_irq_run(job->q->fence_irq);
dd08ebf6
MB
227}
228
229bool xe_sched_job_started(struct xe_sched_job *job)
230{
9b9529ce 231 struct xe_lrc *lrc = job->q->lrc;
dd08ebf6 232
1a9d163c
MA
233 return !__dma_fence_is_later(xe_sched_job_seqno(job),
234 xe_lrc_start_seqno(lrc),
235 job->fence->ops);
dd08ebf6
MB
236}
237
238bool xe_sched_job_completed(struct xe_sched_job *job)
239{
9b9529ce 240 struct xe_lrc *lrc = job->q->lrc;
dd08ebf6
MB
241
242 /*
243 * Can safely check just LRC[0] seqno as that is last seqno written when
244 * parallel handshake is done.
245 */
246
1a9d163c
MA
247 return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
248 job->fence->ops);
dd08ebf6
MB
249}
250
251void xe_sched_job_arm(struct xe_sched_job *job)
252{
3c88b8f4
TH
253 struct xe_exec_queue *q = job->q;
254 struct xe_vm *vm = q->vm;
255
256 if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
257 (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
258 xe_vm_assert_held(vm);
259 q->tlb_flush_seqno = vm->tlb_flush_seqno;
260 job->ring_ops_flush_tlb = true;
261 }
262
dd08ebf6
MB
263 drm_sched_job_arm(&job->drm);
264}
265
266void xe_sched_job_push(struct xe_sched_job *job)
267{
268 xe_sched_job_get(job);
269 trace_xe_sched_job_exec(job);
270 drm_sched_entity_push_job(&job->drm);
271 xe_sched_job_put(job);
272}
eb9702ad
MB
273
274/**
275 * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
276 * @job:job to add the last fence dependency to
277 * @vm: virtual memory job belongs to
278 *
279 * Returns:
280 * 0 on success, or an error on failing to expand the array.
281 */
282int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
283{
284 struct dma_fence *fence;
285
286 fence = xe_exec_queue_last_fence_get(job->q, vm);
eb9702ad
MB
287
288 return drm_sched_job_add_dependency(&job->drm, fence);
289}
be7d51c5
JRS
290
291struct xe_sched_job_snapshot *
292xe_sched_job_snapshot_capture(struct xe_sched_job *job)
293{
294 struct xe_exec_queue *q = job->q;
295 struct xe_device *xe = q->gt->tile->xe;
296 struct xe_sched_job_snapshot *snapshot;
297 size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
298 u16 i;
299
300 snapshot = kzalloc(len, GFP_ATOMIC);
301 if (!snapshot)
302 return NULL;
303
304 snapshot->batch_addr_len = q->width;
305 for (i = 0; i < q->width; i++)
306 snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]);
307
308 return snapshot;
309}
310
311void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
312{
313 kfree(snapshot);
314}
315
316void
317xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
318 struct drm_printer *p)
319{
320 u16 i;
321
322 if (!snapshot)
323 return;
324
325 for (i = 0; i < snapshot->batch_addr_len; i++)
326 drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
327}