Commit | Line | Data |
---|---|---|
b88baab8 DK |
1 | // SPDX-License-Identifier: MIT |
2 | ||
3 | #include <drm/drm_exec.h> | |
4 | ||
5 | #include "nouveau_drv.h" | |
6 | #include "nouveau_gem.h" | |
7 | #include "nouveau_mem.h" | |
8 | #include "nouveau_dma.h" | |
9 | #include "nouveau_exec.h" | |
10 | #include "nouveau_abi16.h" | |
11 | #include "nouveau_chan.h" | |
12 | #include "nouveau_sched.h" | |
13 | #include "nouveau_uvmm.h" | |
14 | ||
15 | /** | |
16 | * DOC: Overview | |
17 | * | |
18 | * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT, | |
19 | * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC. | |
20 | * | |
21 | * In order to use the UAPI firstly a user client must initialize the VA space | |
22 | * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space | |
23 | * should be managed by the kernel and which by the UMD. | |
24 | * | |
25 | * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the | |
26 | * userspace-managable portion of the VA space. It provides operations to map | |
27 | * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not | |
28 | * backed by a GEM object and the kernel will ignore GEM handles provided | |
29 | * alongside a sparse mapping. | |
30 | * | |
31 | * Userspace may request memory backed mappings either within or outside of the | |
32 | * bounds (but not crossing those bounds) of a previously mapped sparse | |
33 | * mapping. Subsequently requested memory backed mappings within a sparse | |
34 | * mapping will take precedence over the corresponding range of the sparse | |
35 | * mapping. If such memory backed mappings are unmapped the kernel will make | |
36 | * sure that the corresponding sparse mapping will take their place again. | |
37 | * Requests to unmap a sparse mapping that still contains memory backed mappings | |
38 | * will result in those memory backed mappings being unmapped first. | |
39 | * | |
40 | * Unmap requests are not bound to the range of existing mappings and can even | |
41 | * overlap the bounds of sparse mappings. For such a request the kernel will | |
42 | * make sure to unmap all memory backed mappings within the given range, | |
43 | * splitting up memory backed mappings which are only partially contained | |
44 | * within the given range. Unmap requests with the sparse flag set must match | |
45 | * the range of a previously mapped sparse mapping exactly though. | |
46 | * | |
47 | * While the kernel generally permits arbitrary sequences and ranges of memory | |
48 | * backed mappings being mapped and unmapped, either within a single or multiple | |
49 | * VM_BIND ioctl calls, there are some restrictions for sparse mappings. | |
50 | * | |
51 | * The kernel does not permit to: | |
52 | * - unmap non-existent sparse mappings | |
53 | * - unmap a sparse mapping and map a new sparse mapping overlapping the range | |
54 | * of the previously unmapped sparse mapping within the same VM_BIND ioctl | |
55 | * - unmap a sparse mapping and map new memory backed mappings overlapping the | |
56 | * range of the previously unmapped sparse mapping within the same VM_BIND | |
57 | * ioctl | |
58 | * | |
59 | * When using the VM_BIND ioctl to request the kernel to map memory to a given | |
60 | * virtual address in the GPU's VA space there is no guarantee that the actual | |
61 | * mappings are created in the GPU's MMU. If the given memory is swapped out | |
62 | * at the time the bind operation is executed the kernel will stash the mapping | |
63 | * details into it's internal alloctor and create the actual MMU mappings once | |
64 | * the memory is swapped back in. While this is transparent for userspace, it is | |
65 | * guaranteed that all the backing memory is swapped back in and all the memory | |
66 | * mappings, as requested by userspace previously, are actually mapped once the | |
67 | * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. | |
68 | * | |
69 | * A VM_BIND job can be executed either synchronously or asynchronously. If | |
70 | * exectued asynchronously, userspace may provide a list of syncobjs this job | |
71 | * will wait for and/or a list of syncobj the kernel will signal once the | |
72 | * VM_BIND job finished execution. If executed synchronously the ioctl will | |
73 | * block until the bind job is finished. For synchronous jobs the kernel will | |
74 | * not permit any syncobjs submitted to the kernel. | |
75 | * | |
76 | * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC | |
77 | * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide | |
78 | * the option to synchronize them with syncobjs. | |
79 | * | |
80 | * Besides that, EXEC jobs can be scheduled for a specified channel to execute on. | |
81 | * | |
82 | * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have | |
83 | * an up to date view of the VA space. However, the actual mappings might still | |
84 | * be pending. Hence, EXEC jobs require to have the particular fences - of | |
85 | * the corresponding VM_BIND jobs they depent on - attached to them. | |
86 | */ | |
87 | ||
88 | static int | |
89 | nouveau_exec_job_submit(struct nouveau_job *job) | |
90 | { | |
91 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); | |
92 | struct nouveau_cli *cli = job->cli; | |
93 | struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); | |
94 | struct drm_exec *exec = &job->exec; | |
95 | struct drm_gem_object *obj; | |
96 | unsigned long index; | |
97 | int ret; | |
98 | ||
978474dc DK |
99 | /* Create a new fence, but do not emit yet. */ |
100 | ret = nouveau_fence_create(&exec_job->fence, exec_job->chan); | |
b88baab8 DK |
101 | if (ret) |
102 | return ret; | |
103 | ||
104 | nouveau_uvmm_lock(uvmm); | |
105 | drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | | |
106 | DRM_EXEC_IGNORE_DUPLICATES); | |
107 | drm_exec_until_all_locked(exec) { | |
108 | struct drm_gpuva *va; | |
109 | ||
110 | drm_gpuva_for_each_va(va, &uvmm->umgr) { | |
111 | if (unlikely(va == &uvmm->umgr.kernel_alloc_node)) | |
112 | continue; | |
113 | ||
114 | ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); | |
115 | drm_exec_retry_on_contention(exec); | |
116 | if (ret) | |
117 | goto err_uvmm_unlock; | |
118 | } | |
119 | } | |
120 | nouveau_uvmm_unlock(uvmm); | |
121 | ||
122 | drm_exec_for_each_locked_object(exec, index, obj) { | |
123 | struct nouveau_bo *nvbo = nouveau_gem_object(obj); | |
124 | ||
125 | ret = nouveau_bo_validate(nvbo, true, false); | |
126 | if (ret) | |
127 | goto err_exec_fini; | |
128 | } | |
129 | ||
130 | return 0; | |
131 | ||
132 | err_uvmm_unlock: | |
133 | nouveau_uvmm_unlock(uvmm); | |
134 | err_exec_fini: | |
135 | drm_exec_fini(exec); | |
136 | return ret; | |
137 | ||
138 | } | |
139 | ||
140 | static void | |
141 | nouveau_exec_job_armed_submit(struct nouveau_job *job) | |
142 | { | |
143 | struct drm_exec *exec = &job->exec; | |
144 | struct drm_gem_object *obj; | |
145 | unsigned long index; | |
146 | ||
147 | drm_exec_for_each_locked_object(exec, index, obj) | |
148 | dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); | |
149 | ||
150 | drm_exec_fini(exec); | |
151 | } | |
152 | ||
153 | static struct dma_fence * | |
154 | nouveau_exec_job_run(struct nouveau_job *job) | |
155 | { | |
156 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); | |
157 | struct nouveau_channel *chan = exec_job->chan; | |
158 | struct nouveau_fence *fence = exec_job->fence; | |
159 | int i, ret; | |
160 | ||
161 | ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16); | |
162 | if (ret) { | |
163 | NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret); | |
164 | return ERR_PTR(ret); | |
165 | } | |
166 | ||
167 | for (i = 0; i < exec_job->push.count; i++) { | |
443f9e0b DK |
168 | struct drm_nouveau_exec_push *p = &exec_job->push.s[i]; |
169 | bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH; | |
170 | ||
171 | nv50_dma_push(chan, p->va, p->va_len, no_prefetch); | |
b88baab8 DK |
172 | } |
173 | ||
978474dc | 174 | ret = nouveau_fence_emit(fence); |
b88baab8 | 175 | if (ret) { |
978474dc | 176 | nouveau_fence_unref(&exec_job->fence); |
b88baab8 DK |
177 | NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret); |
178 | WIND_RING(chan); | |
179 | return ERR_PTR(ret); | |
180 | } | |
181 | ||
978474dc DK |
182 | /* The fence was emitted successfully, set the job's fence pointer to |
183 | * NULL in order to avoid freeing it up when the job is cleaned up. | |
184 | */ | |
b88baab8 DK |
185 | exec_job->fence = NULL; |
186 | ||
187 | return &fence->base; | |
188 | } | |
189 | ||
190 | static void | |
191 | nouveau_exec_job_free(struct nouveau_job *job) | |
192 | { | |
193 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); | |
194 | ||
195 | nouveau_job_free(job); | |
196 | ||
978474dc | 197 | kfree(exec_job->fence); |
b88baab8 DK |
198 | kfree(exec_job->push.s); |
199 | kfree(exec_job); | |
200 | } | |
201 | ||
202 | static enum drm_gpu_sched_stat | |
203 | nouveau_exec_job_timeout(struct nouveau_job *job) | |
204 | { | |
205 | struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); | |
206 | struct nouveau_channel *chan = exec_job->chan; | |
207 | ||
208 | if (unlikely(!atomic_read(&chan->killed))) | |
209 | nouveau_channel_kill(chan); | |
210 | ||
211 | NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", | |
212 | chan->chid); | |
213 | ||
214 | nouveau_sched_entity_fini(job->entity); | |
215 | ||
31499b01 | 216 | return DRM_GPU_SCHED_STAT_NOMINAL; |
b88baab8 DK |
217 | } |
218 | ||
219 | static struct nouveau_job_ops nouveau_exec_job_ops = { | |
220 | .submit = nouveau_exec_job_submit, | |
221 | .armed_submit = nouveau_exec_job_armed_submit, | |
222 | .run = nouveau_exec_job_run, | |
223 | .free = nouveau_exec_job_free, | |
224 | .timeout = nouveau_exec_job_timeout, | |
225 | }; | |
226 | ||
227 | int | |
228 | nouveau_exec_job_init(struct nouveau_exec_job **pjob, | |
229 | struct nouveau_exec_job_args *__args) | |
230 | { | |
231 | struct nouveau_exec_job *job; | |
232 | struct nouveau_job_args args = {}; | |
443f9e0b DK |
233 | int i, ret; |
234 | ||
235 | for (i = 0; i < __args->push.count; i++) { | |
236 | struct drm_nouveau_exec_push *p = &__args->push.s[i]; | |
237 | ||
238 | if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) { | |
239 | NV_PRINTK(err, nouveau_cli(__args->file_priv), | |
240 | "pushbuf size exceeds limit: 0x%x max 0x%x\n", | |
241 | p->va_len, NV50_DMA_PUSH_MAX_LENGTH); | |
242 | return -EINVAL; | |
243 | } | |
244 | } | |
b88baab8 DK |
245 | |
246 | job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL); | |
247 | if (!job) | |
248 | return -ENOMEM; | |
249 | ||
250 | job->push.count = __args->push.count; | |
251 | if (__args->push.count) { | |
252 | job->push.s = kmemdup(__args->push.s, | |
253 | sizeof(*__args->push.s) * | |
254 | __args->push.count, | |
255 | GFP_KERNEL); | |
256 | if (!job->push.s) { | |
257 | ret = -ENOMEM; | |
258 | goto err_free_job; | |
259 | } | |
260 | } | |
261 | ||
262 | job->chan = __args->chan; | |
263 | ||
264 | args.sched_entity = __args->sched_entity; | |
265 | args.file_priv = __args->file_priv; | |
266 | ||
267 | args.in_sync.count = __args->in_sync.count; | |
268 | args.in_sync.s = __args->in_sync.s; | |
269 | ||
270 | args.out_sync.count = __args->out_sync.count; | |
271 | args.out_sync.s = __args->out_sync.s; | |
272 | ||
273 | args.ops = &nouveau_exec_job_ops; | |
274 | args.resv_usage = DMA_RESV_USAGE_WRITE; | |
275 | ||
276 | ret = nouveau_job_init(&job->base, &args); | |
277 | if (ret) | |
278 | goto err_free_pushs; | |
279 | ||
280 | return 0; | |
281 | ||
282 | err_free_pushs: | |
283 | kfree(job->push.s); | |
284 | err_free_job: | |
285 | kfree(job); | |
286 | *pjob = NULL; | |
287 | ||
288 | return ret; | |
289 | } | |
290 | ||
291 | static int | |
292 | nouveau_exec(struct nouveau_exec_job_args *args) | |
293 | { | |
294 | struct nouveau_exec_job *job; | |
295 | int ret; | |
296 | ||
297 | ret = nouveau_exec_job_init(&job, args); | |
298 | if (ret) | |
299 | return ret; | |
300 | ||
301 | ret = nouveau_job_submit(&job->base); | |
302 | if (ret) | |
303 | goto err_job_fini; | |
304 | ||
305 | return 0; | |
306 | ||
307 | err_job_fini: | |
308 | nouveau_job_fini(&job->base); | |
309 | return ret; | |
310 | } | |
311 | ||
312 | static int | |
313 | nouveau_exec_ucopy(struct nouveau_exec_job_args *args, | |
e39701e3 | 314 | struct drm_nouveau_exec *req) |
b88baab8 DK |
315 | { |
316 | struct drm_nouveau_sync **s; | |
317 | u32 inc = req->wait_count; | |
318 | u64 ins = req->wait_ptr; | |
319 | u32 outc = req->sig_count; | |
320 | u64 outs = req->sig_ptr; | |
321 | u32 pushc = req->push_count; | |
322 | u64 pushs = req->push_ptr; | |
323 | int ret; | |
324 | ||
325 | if (pushc) { | |
326 | args->push.count = pushc; | |
327 | args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s)); | |
328 | if (IS_ERR(args->push.s)) | |
329 | return PTR_ERR(args->push.s); | |
330 | } | |
331 | ||
332 | if (inc) { | |
333 | s = &args->in_sync.s; | |
334 | ||
335 | args->in_sync.count = inc; | |
336 | *s = u_memcpya(ins, inc, sizeof(**s)); | |
337 | if (IS_ERR(*s)) { | |
338 | ret = PTR_ERR(*s); | |
339 | goto err_free_pushs; | |
340 | } | |
341 | } | |
342 | ||
343 | if (outc) { | |
344 | s = &args->out_sync.s; | |
345 | ||
346 | args->out_sync.count = outc; | |
347 | *s = u_memcpya(outs, outc, sizeof(**s)); | |
348 | if (IS_ERR(*s)) { | |
349 | ret = PTR_ERR(*s); | |
350 | goto err_free_ins; | |
351 | } | |
352 | } | |
353 | ||
354 | return 0; | |
355 | ||
356 | err_free_pushs: | |
357 | u_free(args->push.s); | |
358 | err_free_ins: | |
359 | u_free(args->in_sync.s); | |
360 | return ret; | |
361 | } | |
362 | ||
363 | static void | |
364 | nouveau_exec_ufree(struct nouveau_exec_job_args *args) | |
365 | { | |
366 | u_free(args->push.s); | |
367 | u_free(args->in_sync.s); | |
368 | u_free(args->out_sync.s); | |
369 | } | |
370 | ||
371 | int | |
372 | nouveau_exec_ioctl_exec(struct drm_device *dev, | |
e39701e3 | 373 | void *data, |
b88baab8 DK |
374 | struct drm_file *file_priv) |
375 | { | |
376 | struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); | |
377 | struct nouveau_cli *cli = nouveau_cli(file_priv); | |
378 | struct nouveau_abi16_chan *chan16; | |
379 | struct nouveau_channel *chan = NULL; | |
380 | struct nouveau_exec_job_args args = {}; | |
e39701e3 | 381 | struct drm_nouveau_exec *req = data; |
d59e75ee | 382 | int push_max, ret = 0; |
b88baab8 DK |
383 | |
384 | if (unlikely(!abi16)) | |
385 | return -ENOMEM; | |
386 | ||
387 | /* abi16 locks already */ | |
388 | if (unlikely(!nouveau_cli_uvmm(cli))) | |
389 | return nouveau_abi16_put(abi16, -ENOSYS); | |
390 | ||
391 | list_for_each_entry(chan16, &abi16->channels, head) { | |
392 | if (chan16->chan->chid == req->channel) { | |
393 | chan = chan16->chan; | |
394 | break; | |
395 | } | |
396 | } | |
397 | ||
398 | if (!chan) | |
399 | return nouveau_abi16_put(abi16, -ENOENT); | |
400 | ||
401 | if (unlikely(atomic_read(&chan->killed))) | |
402 | return nouveau_abi16_put(abi16, -ENODEV); | |
403 | ||
404 | if (!chan->dma.ib_max) | |
405 | return nouveau_abi16_put(abi16, -ENOSYS); | |
406 | ||
d59e75ee DK |
407 | push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max); |
408 | if (unlikely(req->push_count > push_max)) { | |
b88baab8 | 409 | NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", |
d59e75ee | 410 | req->push_count, push_max); |
b88baab8 DK |
411 | return nouveau_abi16_put(abi16, -EINVAL); |
412 | } | |
413 | ||
414 | ret = nouveau_exec_ucopy(&args, req); | |
415 | if (ret) | |
416 | goto out; | |
417 | ||
418 | args.sched_entity = &chan16->sched_entity; | |
419 | args.file_priv = file_priv; | |
420 | args.chan = chan; | |
421 | ||
422 | ret = nouveau_exec(&args); | |
423 | if (ret) | |
424 | goto out_free_args; | |
425 | ||
426 | out_free_args: | |
427 | nouveau_exec_ufree(&args); | |
428 | out: | |
429 | return nouveau_abi16_put(abi16, ret); | |
430 | } |