drm/scheduler: rework job destruction
[linux-2.6-block.git] / drivers / gpu / drm / v3d / v3d_sched.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (C) 2018 Broadcom */
3
4 /**
5  * DOC: Broadcom V3D scheduling
6  *
7  * The shared DRM GPU scheduler is used to coordinate submitting jobs
8  * to the hardware.  Each DRM fd (roughly a client process) gets its
9  * own scheduler entity, which will process jobs in order.  The GPU
10  * scheduler will round-robin between clients to submit the next job.
11  *
12  * For simplicity, and in order to keep latency low for interactive
13  * jobs when bulk background jobs are queued up, we submit a new job
14  * to the HW only when it has completed the last one, instead of
15  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
16  * v3d_job_dependency() to manage the dependency between bin and
17  * render, instead of having the clients submit jobs using the HW's
18  * semaphores to interlock between them.
19  */
20
21 #include <linux/kthread.h>
22
23 #include "v3d_drv.h"
24 #include "v3d_regs.h"
25 #include "v3d_trace.h"
26
27 static struct v3d_job *
28 to_v3d_job(struct drm_sched_job *sched_job)
29 {
30         return container_of(sched_job, struct v3d_job, base);
31 }
32
33 static struct v3d_bin_job *
34 to_bin_job(struct drm_sched_job *sched_job)
35 {
36         return container_of(sched_job, struct v3d_bin_job, base.base);
37 }
38
39 static struct v3d_render_job *
40 to_render_job(struct drm_sched_job *sched_job)
41 {
42         return container_of(sched_job, struct v3d_render_job, base.base);
43 }
44
45 static struct v3d_tfu_job *
46 to_tfu_job(struct drm_sched_job *sched_job)
47 {
48         return container_of(sched_job, struct v3d_tfu_job, base.base);
49 }
50
51 static struct v3d_csd_job *
52 to_csd_job(struct drm_sched_job *sched_job)
53 {
54         return container_of(sched_job, struct v3d_csd_job, base.base);
55 }
56
57 static void
58 v3d_job_free(struct drm_sched_job *sched_job)
59 {
60         struct v3d_job *job = to_v3d_job(sched_job);
61
62         drm_sched_job_cleanup(sched_job);
63         v3d_job_put(job);
64 }
65
66 /**
67  * Returns the fences that the job depends on, one by one.
68  *
69  * If placed in the scheduler's .dependency method, the corresponding
70  * .run_job won't be called until all of them have been signaled.
71  */
72 static struct dma_fence *
73 v3d_job_dependency(struct drm_sched_job *sched_job,
74                    struct drm_sched_entity *s_entity)
75 {
76         struct v3d_job *job = to_v3d_job(sched_job);
77
78         /* XXX: Wait on a fence for switching the GMP if necessary,
79          * and then do so.
80          */
81
82         if (!xa_empty(&job->deps))
83                 return xa_erase(&job->deps, job->last_dep++);
84
85         return NULL;
86 }
87
88 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
89 {
90         struct v3d_bin_job *job = to_bin_job(sched_job);
91         struct v3d_dev *v3d = job->base.v3d;
92         struct drm_device *dev = &v3d->drm;
93         struct dma_fence *fence;
94         unsigned long irqflags;
95
96         if (unlikely(job->base.base.s_fence->finished.error))
97                 return NULL;
98
99         /* Lock required around bin_job update vs
100          * v3d_overflow_mem_work().
101          */
102         spin_lock_irqsave(&v3d->job_lock, irqflags);
103         v3d->bin_job = job;
104         /* Clear out the overflow allocation, so we don't
105          * reuse the overflow attached to a previous job.
106          */
107         V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
108         spin_unlock_irqrestore(&v3d->job_lock, irqflags);
109
110         v3d_invalidate_caches(v3d);
111
112         fence = v3d_fence_create(v3d, V3D_BIN);
113         if (IS_ERR(fence))
114                 return NULL;
115
116         if (job->base.irq_fence)
117                 dma_fence_put(job->base.irq_fence);
118         job->base.irq_fence = dma_fence_get(fence);
119
120         trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
121                             job->start, job->end);
122
123         /* Set the current and end address of the control list.
124          * Writing the end register is what starts the job.
125          */
126         if (job->qma) {
127                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
128                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
129         }
130         if (job->qts) {
131                 V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
132                                V3D_CLE_CT0QTS_ENABLE |
133                                job->qts);
134         }
135         V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
136         V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
137
138         return fence;
139 }
140
141 static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
142 {
143         struct v3d_render_job *job = to_render_job(sched_job);
144         struct v3d_dev *v3d = job->base.v3d;
145         struct drm_device *dev = &v3d->drm;
146         struct dma_fence *fence;
147
148         if (unlikely(job->base.base.s_fence->finished.error))
149                 return NULL;
150
151         v3d->render_job = job;
152
153         /* Can we avoid this flush?  We need to be careful of
154          * scheduling, though -- imagine job0 rendering to texture and
155          * job1 reading, and them being executed as bin0, bin1,
156          * render0, render1, so that render1's flush at bin time
157          * wasn't enough.
158          */
159         v3d_invalidate_caches(v3d);
160
161         fence = v3d_fence_create(v3d, V3D_RENDER);
162         if (IS_ERR(fence))
163                 return NULL;
164
165         if (job->base.irq_fence)
166                 dma_fence_put(job->base.irq_fence);
167         job->base.irq_fence = dma_fence_get(fence);
168
169         trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
170                             job->start, job->end);
171
172         /* XXX: Set the QCFG */
173
174         /* Set the current and end address of the control list.
175          * Writing the end register is what starts the job.
176          */
177         V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
178         V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
179
180         return fence;
181 }
182
183 static struct dma_fence *
184 v3d_tfu_job_run(struct drm_sched_job *sched_job)
185 {
186         struct v3d_tfu_job *job = to_tfu_job(sched_job);
187         struct v3d_dev *v3d = job->base.v3d;
188         struct drm_device *dev = &v3d->drm;
189         struct dma_fence *fence;
190
191         fence = v3d_fence_create(v3d, V3D_TFU);
192         if (IS_ERR(fence))
193                 return NULL;
194
195         v3d->tfu_job = job;
196         if (job->base.irq_fence)
197                 dma_fence_put(job->base.irq_fence);
198         job->base.irq_fence = dma_fence_get(fence);
199
200         trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
201
202         V3D_WRITE(V3D_TFU_IIA, job->args.iia);
203         V3D_WRITE(V3D_TFU_IIS, job->args.iis);
204         V3D_WRITE(V3D_TFU_ICA, job->args.ica);
205         V3D_WRITE(V3D_TFU_IUA, job->args.iua);
206         V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
207         V3D_WRITE(V3D_TFU_IOS, job->args.ios);
208         V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
209         if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
210                 V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
211                 V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
212                 V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
213         }
214         /* ICFG kicks off the job. */
215         V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
216
217         return fence;
218 }
219
220 static struct dma_fence *
221 v3d_csd_job_run(struct drm_sched_job *sched_job)
222 {
223         struct v3d_csd_job *job = to_csd_job(sched_job);
224         struct v3d_dev *v3d = job->base.v3d;
225         struct drm_device *dev = &v3d->drm;
226         struct dma_fence *fence;
227         int i;
228
229         v3d->csd_job = job;
230
231         v3d_invalidate_caches(v3d);
232
233         fence = v3d_fence_create(v3d, V3D_CSD);
234         if (IS_ERR(fence))
235                 return NULL;
236
237         if (job->base.irq_fence)
238                 dma_fence_put(job->base.irq_fence);
239         job->base.irq_fence = dma_fence_get(fence);
240
241         trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
242
243         for (i = 1; i <= 6; i++)
244                 V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
245         /* CFG0 write kicks off the job. */
246         V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
247
248         return fence;
249 }
250
251 static struct dma_fence *
252 v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
253 {
254         struct v3d_job *job = to_v3d_job(sched_job);
255         struct v3d_dev *v3d = job->v3d;
256
257         v3d_clean_caches(v3d);
258
259         return NULL;
260 }
261
262 static void
263 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
264 {
265         enum v3d_queue q;
266
267         mutex_lock(&v3d->reset_lock);
268
269         /* block scheduler */
270         for (q = 0; q < V3D_MAX_QUEUES; q++)
271                 drm_sched_stop(&v3d->queue[q].sched, sched_job);
272
273         if (sched_job)
274                 drm_sched_increase_karma(sched_job);
275
276         /* get the GPU back into the init state */
277         v3d_reset(v3d);
278
279         for (q = 0; q < V3D_MAX_QUEUES; q++)
280                 drm_sched_resubmit_jobs(&v3d->queue[q].sched);
281
282         /* Unblock schedulers and restart their jobs. */
283         for (q = 0; q < V3D_MAX_QUEUES; q++) {
284                 drm_sched_start(&v3d->queue[q].sched, true);
285         }
286
287         mutex_unlock(&v3d->reset_lock);
288 }
289
290 /* If the current address or return address have changed, then the GPU
291  * has probably made progress and we should delay the reset.  This
292  * could fail if the GPU got in an infinite loop in the CL, but that
293  * is pretty unlikely outside of an i-g-t testcase.
294  */
295 static void
296 v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
297                     u32 *timedout_ctca, u32 *timedout_ctra)
298 {
299         struct v3d_job *job = to_v3d_job(sched_job);
300         struct v3d_dev *v3d = job->v3d;
301         u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
302         u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
303
304         if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
305                 *timedout_ctca = ctca;
306                 *timedout_ctra = ctra;
307                 return;
308         }
309
310         v3d_gpu_reset_for_timeout(v3d, sched_job);
311 }
312
313 static void
314 v3d_bin_job_timedout(struct drm_sched_job *sched_job)
315 {
316         struct v3d_bin_job *job = to_bin_job(sched_job);
317
318         v3d_cl_job_timedout(sched_job, V3D_BIN,
319                             &job->timedout_ctca, &job->timedout_ctra);
320 }
321
322 static void
323 v3d_render_job_timedout(struct drm_sched_job *sched_job)
324 {
325         struct v3d_render_job *job = to_render_job(sched_job);
326
327         v3d_cl_job_timedout(sched_job, V3D_RENDER,
328                             &job->timedout_ctca, &job->timedout_ctra);
329 }
330
331 static void
332 v3d_generic_job_timedout(struct drm_sched_job *sched_job)
333 {
334         struct v3d_job *job = to_v3d_job(sched_job);
335
336         v3d_gpu_reset_for_timeout(job->v3d, sched_job);
337 }
338
339 static void
340 v3d_csd_job_timedout(struct drm_sched_job *sched_job)
341 {
342         struct v3d_csd_job *job = to_csd_job(sched_job);
343         struct v3d_dev *v3d = job->base.v3d;
344         u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
345
346         /* If we've made progress, skip reset and let the timer get
347          * rearmed.
348          */
349         if (job->timedout_batches != batches) {
350                 job->timedout_batches = batches;
351                 return;
352         }
353
354         v3d_gpu_reset_for_timeout(v3d, sched_job);
355 }
356
357 static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
358         .dependency = v3d_job_dependency,
359         .run_job = v3d_bin_job_run,
360         .timedout_job = v3d_bin_job_timedout,
361         .free_job = v3d_job_free,
362 };
363
364 static const struct drm_sched_backend_ops v3d_render_sched_ops = {
365         .dependency = v3d_job_dependency,
366         .run_job = v3d_render_job_run,
367         .timedout_job = v3d_render_job_timedout,
368         .free_job = v3d_job_free,
369 };
370
371 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
372         .dependency = v3d_job_dependency,
373         .run_job = v3d_tfu_job_run,
374         .timedout_job = v3d_generic_job_timedout,
375         .free_job = v3d_job_free,
376 };
377
378 static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
379         .dependency = v3d_job_dependency,
380         .run_job = v3d_csd_job_run,
381         .timedout_job = v3d_csd_job_timedout,
382         .free_job = v3d_job_free
383 };
384
385 static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
386         .dependency = v3d_job_dependency,
387         .run_job = v3d_cache_clean_job_run,
388         .timedout_job = v3d_generic_job_timedout,
389         .free_job = v3d_job_free
390 };
391
392 int
393 v3d_sched_init(struct v3d_dev *v3d)
394 {
395         int hw_jobs_limit = 1;
396         int job_hang_limit = 0;
397         int hang_limit_ms = 500;
398         int ret;
399
400         ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
401                              &v3d_bin_sched_ops,
402                              hw_jobs_limit, job_hang_limit,
403                              msecs_to_jiffies(hang_limit_ms),
404                              "v3d_bin");
405         if (ret) {
406                 dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
407                 return ret;
408         }
409
410         ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
411                              &v3d_render_sched_ops,
412                              hw_jobs_limit, job_hang_limit,
413                              msecs_to_jiffies(hang_limit_ms),
414                              "v3d_render");
415         if (ret) {
416                 dev_err(v3d->dev, "Failed to create render scheduler: %d.",
417                         ret);
418                 v3d_sched_fini(v3d);
419                 return ret;
420         }
421
422         ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
423                              &v3d_tfu_sched_ops,
424                              hw_jobs_limit, job_hang_limit,
425                              msecs_to_jiffies(hang_limit_ms),
426                              "v3d_tfu");
427         if (ret) {
428                 dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
429                         ret);
430                 v3d_sched_fini(v3d);
431                 return ret;
432         }
433
434         if (v3d_has_csd(v3d)) {
435                 ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
436                                      &v3d_csd_sched_ops,
437                                      hw_jobs_limit, job_hang_limit,
438                                      msecs_to_jiffies(hang_limit_ms),
439                                      "v3d_csd");
440                 if (ret) {
441                         dev_err(v3d->dev, "Failed to create CSD scheduler: %d.",
442                                 ret);
443                         v3d_sched_fini(v3d);
444                         return ret;
445                 }
446
447                 ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
448                                      &v3d_cache_clean_sched_ops,
449                                      hw_jobs_limit, job_hang_limit,
450                                      msecs_to_jiffies(hang_limit_ms),
451                                      "v3d_cache_clean");
452                 if (ret) {
453                         dev_err(v3d->dev, "Failed to create CACHE_CLEAN scheduler: %d.",
454                                 ret);
455                         v3d_sched_fini(v3d);
456                         return ret;
457                 }
458         }
459
460         return 0;
461 }
462
463 void
464 v3d_sched_fini(struct v3d_dev *v3d)
465 {
466         enum v3d_queue q;
467
468         for (q = 0; q < V3D_MAX_QUEUES; q++) {
469                 if (v3d->queue[q].sched.ready)
470                         drm_sched_fini(&v3d->queue[q].sched);
471         }
472 }