Merge drm/drm-next into drm-misc-next
[linux-2.6-block.git] / drivers / gpu / drm / nouveau / nouveau_sched.c
1 // SPDX-License-Identifier: MIT
2
3 #include <linux/slab.h>
4 #include <drm/gpu_scheduler.h>
5 #include <drm/drm_syncobj.h>
6
7 #include "nouveau_drv.h"
8 #include "nouveau_gem.h"
9 #include "nouveau_mem.h"
10 #include "nouveau_dma.h"
11 #include "nouveau_exec.h"
12 #include "nouveau_abi16.h"
13 #include "nouveau_sched.h"
14
15 /* FIXME
16  *
17  * We want to make sure that jobs currently executing can't be deferred by
18  * other jobs competing for the hardware. Otherwise we might end up with job
19  * timeouts just because of too many clients submitting too many jobs. We don't
20  * want jobs to time out because of system load, but because of the job being
21  * too bulky.
22  *
23  * For now allow for up to 16 concurrent jobs in flight until we know how many
24  * rings the hardware can process in parallel.
25  */
26 #define NOUVEAU_SCHED_HW_SUBMISSIONS            16
27 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS            10000
28
29 int
30 nouveau_job_init(struct nouveau_job *job,
31                  struct nouveau_job_args *args)
32 {
33         struct nouveau_sched_entity *entity = args->sched_entity;
34         int ret;
35
36         job->file_priv = args->file_priv;
37         job->cli = nouveau_cli(args->file_priv);
38         job->entity = entity;
39
40         job->sync = args->sync;
41         job->resv_usage = args->resv_usage;
42
43         job->ops = args->ops;
44
45         job->in_sync.count = args->in_sync.count;
46         if (job->in_sync.count) {
47                 if (job->sync)
48                         return -EINVAL;
49
50                 job->in_sync.data = kmemdup(args->in_sync.s,
51                                          sizeof(*args->in_sync.s) *
52                                          args->in_sync.count,
53                                          GFP_KERNEL);
54                 if (!job->in_sync.data)
55                         return -ENOMEM;
56         }
57
58         job->out_sync.count = args->out_sync.count;
59         if (job->out_sync.count) {
60                 if (job->sync) {
61                         ret = -EINVAL;
62                         goto err_free_in_sync;
63                 }
64
65                 job->out_sync.data = kmemdup(args->out_sync.s,
66                                           sizeof(*args->out_sync.s) *
67                                           args->out_sync.count,
68                                           GFP_KERNEL);
69                 if (!job->out_sync.data) {
70                         ret = -ENOMEM;
71                         goto err_free_in_sync;
72                 }
73
74                 job->out_sync.objs = kcalloc(job->out_sync.count,
75                                              sizeof(*job->out_sync.objs),
76                                              GFP_KERNEL);
77                 if (!job->out_sync.objs) {
78                         ret = -ENOMEM;
79                         goto err_free_out_sync;
80                 }
81
82                 job->out_sync.chains = kcalloc(job->out_sync.count,
83                                                sizeof(*job->out_sync.chains),
84                                                GFP_KERNEL);
85                 if (!job->out_sync.chains) {
86                         ret = -ENOMEM;
87                         goto err_free_objs;
88                 }
89
90         }
91
92         ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
93         if (ret)
94                 goto err_free_chains;
95
96         job->state = NOUVEAU_JOB_INITIALIZED;
97
98         return 0;
99
100 err_free_chains:
101         kfree(job->out_sync.chains);
102 err_free_objs:
103         kfree(job->out_sync.objs);
104 err_free_out_sync:
105         kfree(job->out_sync.data);
106 err_free_in_sync:
107         kfree(job->in_sync.data);
108 return ret;
109 }
110
111 void
112 nouveau_job_free(struct nouveau_job *job)
113 {
114         kfree(job->in_sync.data);
115         kfree(job->out_sync.data);
116         kfree(job->out_sync.objs);
117         kfree(job->out_sync.chains);
118 }
119
120 void nouveau_job_fini(struct nouveau_job *job)
121 {
122         dma_fence_put(job->done_fence);
123         drm_sched_job_cleanup(&job->base);
124         job->ops->free(job);
125 }
126
127 static int
128 sync_find_fence(struct nouveau_job *job,
129                 struct drm_nouveau_sync *sync,
130                 struct dma_fence **fence)
131 {
132         u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
133         u64 point = 0;
134         int ret;
135
136         if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
137             stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
138                 return -EOPNOTSUPP;
139
140         if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
141                 point = sync->timeline_value;
142
143         ret = drm_syncobj_find_fence(job->file_priv,
144                                      sync->handle, point,
145                                      0 /* flags */, fence);
146         if (ret)
147                 return ret;
148
149         return 0;
150 }
151
152 static int
153 nouveau_job_add_deps(struct nouveau_job *job)
154 {
155         struct dma_fence *in_fence = NULL;
156         int ret, i;
157
158         for (i = 0; i < job->in_sync.count; i++) {
159                 struct drm_nouveau_sync *sync = &job->in_sync.data[i];
160
161                 ret = sync_find_fence(job, sync, &in_fence);
162                 if (ret) {
163                         NV_PRINTK(warn, job->cli,
164                                   "Failed to find syncobj (-> in): handle=%d\n",
165                                   sync->handle);
166                         return ret;
167                 }
168
169                 ret = drm_sched_job_add_dependency(&job->base, in_fence);
170                 if (ret)
171                         return ret;
172         }
173
174         return 0;
175 }
176
177 static void
178 nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
179 {
180         int i;
181
182         for (i = 0; i < job->out_sync.count; i++) {
183                 struct drm_syncobj *obj = job->out_sync.objs[i];
184                 struct dma_fence_chain *chain = job->out_sync.chains[i];
185
186                 if (obj)
187                         drm_syncobj_put(obj);
188
189                 if (chain)
190                         dma_fence_chain_free(chain);
191         }
192 }
193
194 static int
195 nouveau_job_fence_attach_prepare(struct nouveau_job *job)
196 {
197         int i, ret;
198
199         for (i = 0; i < job->out_sync.count; i++) {
200                 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
201                 struct drm_syncobj **pobj = &job->out_sync.objs[i];
202                 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
203                 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
204
205                 if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
206                     stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
207                         ret = -EINVAL;
208                         goto err_sync_cleanup;
209                 }
210
211                 *pobj = drm_syncobj_find(job->file_priv, sync->handle);
212                 if (!*pobj) {
213                         NV_PRINTK(warn, job->cli,
214                                   "Failed to find syncobj (-> out): handle=%d\n",
215                                   sync->handle);
216                         ret = -ENOENT;
217                         goto err_sync_cleanup;
218                 }
219
220                 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
221                         *pchain = dma_fence_chain_alloc();
222                         if (!*pchain) {
223                                 ret = -ENOMEM;
224                                 goto err_sync_cleanup;
225                         }
226                 }
227         }
228
229         return 0;
230
231 err_sync_cleanup:
232         nouveau_job_fence_attach_cleanup(job);
233         return ret;
234 }
235
236 static void
237 nouveau_job_fence_attach(struct nouveau_job *job)
238 {
239         struct dma_fence *fence = job->done_fence;
240         int i;
241
242         for (i = 0; i < job->out_sync.count; i++) {
243                 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
244                 struct drm_syncobj **pobj = &job->out_sync.objs[i];
245                 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
246                 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
247
248                 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
249                         drm_syncobj_add_point(*pobj, *pchain, fence,
250                                               sync->timeline_value);
251                 } else {
252                         drm_syncobj_replace_fence(*pobj, fence);
253                 }
254
255                 drm_syncobj_put(*pobj);
256                 *pobj = NULL;
257                 *pchain = NULL;
258         }
259 }
260
261 int
262 nouveau_job_submit(struct nouveau_job *job)
263 {
264         struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
265         struct dma_fence *done_fence = NULL;
266         int ret;
267
268         ret = nouveau_job_add_deps(job);
269         if (ret)
270                 goto err;
271
272         ret = nouveau_job_fence_attach_prepare(job);
273         if (ret)
274                 goto err;
275
276         /* Make sure the job appears on the sched_entity's queue in the same
277          * order as it was submitted.
278          */
279         mutex_lock(&entity->mutex);
280
281         /* Guarantee we won't fail after the submit() callback returned
282          * successfully.
283          */
284         if (job->ops->submit) {
285                 ret = job->ops->submit(job);
286                 if (ret)
287                         goto err_cleanup;
288         }
289
290         drm_sched_job_arm(&job->base);
291         job->done_fence = dma_fence_get(&job->base.s_fence->finished);
292         if (job->sync)
293                 done_fence = dma_fence_get(job->done_fence);
294
295         /* If a sched job depends on a dma-fence from a job from the same GPU
296          * scheduler instance, but a different scheduler entity, the GPU
297          * scheduler does only wait for the particular job to be scheduled,
298          * rather than for the job to fully complete. This is due to the GPU
299          * scheduler assuming that there is a scheduler instance per ring.
300          * However, the current implementation, in order to avoid arbitrary
301          * amounts of kthreads, has a single scheduler instance while scheduler
302          * entities represent rings.
303          *
304          * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
305          * out-fences in order to force the scheduler to wait for full job
306          * completion for dependent jobs from different entities and same
307          * scheduler instance.
308          *
309          * There is some work in progress [1] to address the issues of firmware
310          * schedulers; once it is in-tree the scheduler topology in Nouveau
311          * should be re-worked accordingly.
312          *
313          * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
314          */
315         set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
316
317         if (job->ops->armed_submit)
318                 job->ops->armed_submit(job);
319
320         nouveau_job_fence_attach(job);
321
322         /* Set job state before pushing the job to the scheduler,
323          * such that we do not overwrite the job state set in run().
324          */
325         job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
326
327         drm_sched_entity_push_job(&job->base);
328
329         mutex_unlock(&entity->mutex);
330
331         if (done_fence) {
332                 dma_fence_wait(done_fence, true);
333                 dma_fence_put(done_fence);
334         }
335
336         return 0;
337
338 err_cleanup:
339         mutex_unlock(&entity->mutex);
340         nouveau_job_fence_attach_cleanup(job);
341 err:
342         job->state = NOUVEAU_JOB_SUBMIT_FAILED;
343         return ret;
344 }
345
346 bool
347 nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
348                            struct work_struct *work)
349 {
350         return queue_work(entity->sched_wq, work);
351 }
352
353 static struct dma_fence *
354 nouveau_job_run(struct nouveau_job *job)
355 {
356         struct dma_fence *fence;
357
358         fence = job->ops->run(job);
359         if (IS_ERR(fence))
360                 job->state = NOUVEAU_JOB_RUN_FAILED;
361         else
362                 job->state = NOUVEAU_JOB_RUN_SUCCESS;
363
364         return fence;
365 }
366
367 static struct dma_fence *
368 nouveau_sched_run_job(struct drm_sched_job *sched_job)
369 {
370         struct nouveau_job *job = to_nouveau_job(sched_job);
371
372         return nouveau_job_run(job);
373 }
374
375 static enum drm_gpu_sched_stat
376 nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
377 {
378         struct drm_gpu_scheduler *sched = sched_job->sched;
379         struct nouveau_job *job = to_nouveau_job(sched_job);
380         enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;
381
382         drm_sched_stop(sched, sched_job);
383
384         if (job->ops->timeout)
385                 stat = job->ops->timeout(job);
386         else
387                 NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
388
389         drm_sched_start(sched, true);
390
391         return stat;
392 }
393
394 static void
395 nouveau_sched_free_job(struct drm_sched_job *sched_job)
396 {
397         struct nouveau_job *job = to_nouveau_job(sched_job);
398
399         nouveau_job_fini(job);
400 }
401
402 int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
403                               struct drm_gpu_scheduler *sched,
404                               struct workqueue_struct *sched_wq)
405 {
406         mutex_init(&entity->mutex);
407         spin_lock_init(&entity->job.list.lock);
408         INIT_LIST_HEAD(&entity->job.list.head);
409         init_waitqueue_head(&entity->job.wq);
410
411         entity->sched_wq = sched_wq;
412         return drm_sched_entity_init(&entity->base,
413                                      DRM_SCHED_PRIORITY_NORMAL,
414                                      &sched, 1, NULL);
415 }
416
417 void
418 nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
419 {
420         drm_sched_entity_destroy(&entity->base);
421 }
422
423 static const struct drm_sched_backend_ops nouveau_sched_ops = {
424         .run_job = nouveau_sched_run_job,
425         .timedout_job = nouveau_sched_timedout_job,
426         .free_job = nouveau_sched_free_job,
427 };
428
429 int nouveau_sched_init(struct nouveau_drm *drm)
430 {
431         struct drm_gpu_scheduler *sched = &drm->sched;
432         long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
433
434         drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
435         if (!drm->sched_wq)
436                 return -ENOMEM;
437
438         return drm_sched_init(sched, &nouveau_sched_ops, NULL,
439                               DRM_SCHED_PRIORITY_COUNT,
440                               NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
441                               NULL, NULL, "nouveau_sched", drm->dev->dev);
442 }
443
444 void nouveau_sched_fini(struct nouveau_drm *drm)
445 {
446         destroy_workqueue(drm->sched_wq);
447         drm_sched_fini(&drm->sched);
448 }