drm/amdgpu: move entity selection and job init earlier during CS
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
fdf2f6c5
SR
27
28#include <linux/file.h>
568d7c76 29#include <linux/pagemap.h>
7ca24cf2 30#include <linux/sync_file.h>
4993ba02 31#include <linux/dma-buf.h>
fdf2f6c5 32
d38ceaf9 33#include <drm/amdgpu_drm.h>
660e8558 34#include <drm/drm_syncobj.h>
a190f8dc 35#include "amdgpu_cs.h"
d38ceaf9
AD
36#include "amdgpu.h"
37#include "amdgpu_trace.h"
c8c5e569 38#include "amdgpu_gmc.h"
2cddc50e 39#include "amdgpu_gem.h"
7c6e68c7 40#include "amdgpu_ras.h"
d38ceaf9 41
88c98d54
CK
42static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
43 struct amdgpu_device *adev,
44 struct drm_file *filp,
45 union drm_amdgpu_cs *cs)
46{
47 struct amdgpu_fpriv *fpriv = filp->driver_priv;
48
49 if (cs->in.num_chunks == 0)
50 return -EINVAL;
51
52 memset(p, 0, sizeof(*p));
53 p->adev = adev;
54 p->filp = filp;
55
56 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
57 if (!p->ctx)
58 return -EINVAL;
59
60 if (atomic_read(&p->ctx->guilty)) {
61 amdgpu_ctx_put(p->ctx);
62 return -ECANCELED;
63 }
64 return 0;
65}
66
67static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
68 struct drm_amdgpu_cs_chunk_ib *chunk_ib,
69 unsigned int *num_ibs)
70{
c2b08e7a
CK
71 struct drm_sched_entity *entity;
72 int r;
73
74 r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
75 chunk_ib->ip_instance,
76 chunk_ib->ring, &entity);
77 if (r)
78 return r;
79
80 /* Abort if there is no run queue associated with this entity.
81 * Possibly because of disabled HW IP*/
82 if (entity->rq == NULL)
83 return -EINVAL;
84
85 /* Currently we don't support submitting to multiple entities */
86 if (p->entity && p->entity != entity)
87 return -EINVAL;
88
89 p->entity = entity;
88c98d54
CK
90 ++(*num_ibs);
91 return 0;
92}
93
94static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
95 struct drm_amdgpu_cs_chunk_fence *data,
96 uint32_t *offset)
91acbeb6
CK
97{
98 struct drm_gem_object *gobj;
e83dfe4d 99 struct amdgpu_bo *bo;
aa29040b 100 unsigned long size;
7893499e 101 int r;
91acbeb6 102
a8ad0bd8 103 gobj = drm_gem_object_lookup(p->filp, data->handle);
91acbeb6
CK
104 if (gobj == NULL)
105 return -EINVAL;
106
e83dfe4d 107 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
91acbeb6 108 p->uf_entry.priority = 0;
e83dfe4d 109 p->uf_entry.tv.bo = &bo->tbo;
73511edf
CK
110 /* One for TTM and two for the CS job */
111 p->uf_entry.tv.num_shared = 3;
aa29040b 112
e07ddb0c 113 drm_gem_object_put(gobj);
758ac17f 114
e83dfe4d 115 size = amdgpu_bo_size(bo);
7893499e
CK
116 if (size != PAGE_SIZE || (data->offset + 8) > size) {
117 r = -EINVAL;
118 goto error_unref;
119 }
120
e83dfe4d 121 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
7893499e
CK
122 r = -EINVAL;
123 goto error_unref;
758ac17f
CK
124 }
125
7893499e
CK
126 *offset = data->offset;
127
91acbeb6 128 return 0;
7893499e
CK
129
130error_unref:
e83dfe4d 131 amdgpu_bo_unref(&bo);
7893499e 132 return r;
91acbeb6
CK
133}
134
88c98d54
CK
135static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
136 struct drm_amdgpu_bo_list_in *data)
964d0fbf 137{
88c98d54 138 struct drm_amdgpu_bo_list_entry *info;
964d0fbf 139 int r;
964d0fbf
AG
140
141 r = amdgpu_bo_create_list_entry_array(data, &info);
142 if (r)
143 return r;
144
145 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
146 &p->bo_list);
147 if (r)
148 goto error_free;
149
150 kvfree(info);
151 return 0;
152
153error_free:
802b8c83 154 kvfree(info);
964d0fbf
AG
155
156 return r;
157}
158
88c98d54
CK
159/* Copy the data from userspace and go over it the first time */
160static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
161 union drm_amdgpu_cs *cs)
d38ceaf9 162{
4c0b242c 163 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
c5637837 164 struct amdgpu_vm *vm = &fpriv->vm;
d38ceaf9 165 uint64_t *chunk_array_user;
1d263474 166 uint64_t *chunk_array;
50838c8c 167 unsigned size, num_ibs = 0;
758ac17f 168 uint32_t uf_offset = 0;
1d263474 169 int ret;
88c98d54 170 int i;
d38ceaf9 171
88c98d54
CK
172 chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
173 GFP_KERNEL);
1d263474
DC
174 if (!chunk_array)
175 return -ENOMEM;
d38ceaf9 176
d38ceaf9 177 /* get chunks */
7ecc245a 178 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
d38ceaf9
AD
179 if (copy_from_user(chunk_array, chunk_array_user,
180 sizeof(uint64_t)*cs->in.num_chunks)) {
1d263474 181 ret = -EFAULT;
26eedf6d 182 goto free_chunk;
d38ceaf9
AD
183 }
184
185 p->nchunks = cs->in.num_chunks;
b4d916ee 186 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
d38ceaf9 187 GFP_KERNEL);
1d263474
DC
188 if (!p->chunks) {
189 ret = -ENOMEM;
26eedf6d 190 goto free_chunk;
d38ceaf9
AD
191 }
192
193 for (i = 0; i < p->nchunks; i++) {
194 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
195 struct drm_amdgpu_cs_chunk user_chunk;
196 uint32_t __user *cdata;
197
7ecc245a 198 chunk_ptr = u64_to_user_ptr(chunk_array[i]);
d38ceaf9
AD
199 if (copy_from_user(&user_chunk, chunk_ptr,
200 sizeof(struct drm_amdgpu_cs_chunk))) {
1d263474
DC
201 ret = -EFAULT;
202 i--;
203 goto free_partial_kdata;
d38ceaf9
AD
204 }
205 p->chunks[i].chunk_id = user_chunk.chunk_id;
206 p->chunks[i].length_dw = user_chunk.length_dw;
d38ceaf9
AD
207
208 size = p->chunks[i].length_dw;
7ecc245a 209 cdata = u64_to_user_ptr(user_chunk.chunk_data);
d38ceaf9 210
88c98d54
CK
211 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
212 GFP_KERNEL);
d38ceaf9 213 if (p->chunks[i].kdata == NULL) {
1d263474
DC
214 ret = -ENOMEM;
215 i--;
216 goto free_partial_kdata;
d38ceaf9
AD
217 }
218 size *= sizeof(uint32_t);
219 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
1d263474
DC
220 ret = -EFAULT;
221 goto free_partial_kdata;
d38ceaf9
AD
222 }
223
88c98d54
CK
224 /* Assume the worst on the following checks */
225 ret = -EINVAL;
9a5e8fb1
CK
226 switch (p->chunks[i].chunk_id) {
227 case AMDGPU_CHUNK_ID_IB:
88c98d54
CK
228 if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
229 goto free_partial_kdata;
230
231 ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs);
232 if (ret)
233 goto free_partial_kdata;
9a5e8fb1
CK
234 break;
235
236 case AMDGPU_CHUNK_ID_FENCE:
88c98d54 237 if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
1d263474 238 goto free_partial_kdata;
91acbeb6 239
88c98d54
CK
240 ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
241 &uf_offset);
91acbeb6
CK
242 if (ret)
243 goto free_partial_kdata;
9a5e8fb1
CK
244 break;
245
964d0fbf 246 case AMDGPU_CHUNK_ID_BO_HANDLES:
88c98d54 247 if (size < sizeof(struct drm_amdgpu_bo_list_in))
964d0fbf 248 goto free_partial_kdata;
964d0fbf 249
88c98d54 250 ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
964d0fbf
AG
251 if (ret)
252 goto free_partial_kdata;
964d0fbf
AG
253 break;
254
2b48d323 255 case AMDGPU_CHUNK_ID_DEPENDENCIES:
660e8558
DA
256 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
257 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
67dd1a36 258 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
2624dd15
CZ
259 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
260 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
2b48d323
CK
261 break;
262
9a5e8fb1 263 default:
1d263474 264 goto free_partial_kdata;
d38ceaf9
AD
265 }
266 }
267
c5637837 268 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
50838c8c 269 if (ret)
4acabfe3 270 goto free_all_kdata;
d38ceaf9 271
c2b08e7a
CK
272 ret = drm_sched_job_init(&p->job->base, p->entity, &fpriv->vm);
273 if (ret)
274 goto free_all_kdata;
275
e55f2b64
CK
276 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
277 ret = -ECANCELED;
278 goto free_all_kdata;
279 }
14e47f93 280
e83dfe4d 281 if (p->uf_entry.tv.bo)
b5f5acbc 282 p->job->uf_addr = uf_offset;
b4d916ee 283 kvfree(chunk_array);
efaa9646
AG
284
285 /* Use this opportunity to fill in task info for the vm */
286 amdgpu_vm_set_task_info(vm);
287
1d263474
DC
288 return 0;
289
290free_all_kdata:
291 i = p->nchunks - 1;
292free_partial_kdata:
293 for (; i >= 0; i--)
2098105e 294 kvfree(p->chunks[i].kdata);
b4d916ee 295 kvfree(p->chunks);
607523d1
DA
296 p->chunks = NULL;
297 p->nchunks = 0;
1d263474 298free_chunk:
b4d916ee 299 kvfree(chunk_array);
1d263474
DC
300
301 return ret;
d38ceaf9
AD
302}
303
f4b92fcd
CK
304static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
305 struct amdgpu_cs_chunk *chunk,
306 unsigned int *num_ibs,
307 unsigned int *ce_preempt,
308 unsigned int *de_preempt)
d4e8ad90 309{
f4b92fcd
CK
310 struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
311 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
c2b08e7a 312 struct amdgpu_ring *ring = amdgpu_job_ring(p->job);
f4b92fcd 313 struct amdgpu_ib *ib = &p->job->ibs[*num_ibs];
d4e8ad90 314 struct amdgpu_vm *vm = &fpriv->vm;
f4b92fcd 315 int r;
d4e8ad90 316
f4b92fcd
CK
317 /* MM engine doesn't support user fences */
318 if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
319 return -EINVAL;
d4e8ad90 320
f4b92fcd
CK
321 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
322 chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
323 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
324 (*ce_preempt)++;
325 else
326 (*de_preempt)++;
d4e8ad90 327
f4b92fcd
CK
328 /* Each GFX command submit allows only 1 IB max
329 * preemptible for CE & DE */
330 if (*ce_preempt > 1 || *de_preempt > 1)
d4e8ad90 331 return -EINVAL;
f4b92fcd 332 }
d4e8ad90 333
f4b92fcd
CK
334 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
335 p->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
d4e8ad90 336
f4b92fcd
CK
337 r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
338 chunk_ib->ib_bytes : 0,
339 AMDGPU_IB_POOL_DELAYED, ib);
340 if (r) {
341 DRM_ERROR("Failed to get ib !\n");
342 return r;
d4e8ad90
CK
343 }
344
f4b92fcd
CK
345 ib->gpu_addr = chunk_ib->va_start;
346 ib->length_dw = chunk_ib->ib_bytes / 4;
347 ib->flags = chunk_ib->flags;
d4e8ad90 348
f4b92fcd 349 (*num_ibs)++;
d4e8ad90
CK
350 return 0;
351}
352
f4b92fcd
CK
353static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
354 struct amdgpu_cs_chunk *chunk)
d4e8ad90 355{
f4b92fcd 356 struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
d4e8ad90
CK
357 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
358 unsigned num_deps;
359 int i, r;
d4e8ad90 360
d4e8ad90
CK
361 num_deps = chunk->length_dw * 4 /
362 sizeof(struct drm_amdgpu_cs_chunk_dep);
363
364 for (i = 0; i < num_deps; ++i) {
365 struct amdgpu_ctx *ctx;
366 struct drm_sched_entity *entity;
367 struct dma_fence *fence;
368
369 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
370 if (ctx == NULL)
371 return -EINVAL;
372
373 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
374 deps[i].ip_instance,
375 deps[i].ring, &entity);
376 if (r) {
377 amdgpu_ctx_put(ctx);
378 return r;
379 }
380
381 fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
382 amdgpu_ctx_put(ctx);
383
384 if (IS_ERR(fence))
385 return PTR_ERR(fence);
386 else if (!fence)
387 continue;
388
389 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
390 struct drm_sched_fence *s_fence;
391 struct dma_fence *old = fence;
392
393 s_fence = to_drm_sched_fence(fence);
394 fence = dma_fence_get(&s_fence->scheduled);
395 dma_fence_put(old);
396 }
397
398 r = amdgpu_sync_fence(&p->job->sync, fence);
399 dma_fence_put(fence);
400 if (r)
401 return r;
402 }
403 return 0;
404}
405
f4b92fcd
CK
406static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
407 uint32_t handle, u64 point,
408 u64 flags)
d4e8ad90
CK
409{
410 struct dma_fence *fence;
411 int r;
412
413 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
414 if (r) {
415 DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
416 handle, point, r);
417 return r;
418 }
419
420 r = amdgpu_sync_fence(&p->job->sync, fence);
421 dma_fence_put(fence);
422
423 return r;
424}
425
f4b92fcd
CK
426static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
427 struct amdgpu_cs_chunk *chunk)
d4e8ad90 428{
f4b92fcd 429 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
d4e8ad90
CK
430 unsigned num_deps;
431 int i, r;
432
d4e8ad90
CK
433 num_deps = chunk->length_dw * 4 /
434 sizeof(struct drm_amdgpu_cs_chunk_sem);
435 for (i = 0; i < num_deps; ++i) {
f4b92fcd 436 r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
d4e8ad90
CK
437 if (r)
438 return r;
439 }
440
441 return 0;
442}
443
f4b92fcd
CK
444static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
445 struct amdgpu_cs_chunk *chunk)
d4e8ad90 446{
f4b92fcd 447 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
d4e8ad90
CK
448 unsigned num_deps;
449 int i, r;
450
d4e8ad90
CK
451 num_deps = chunk->length_dw * 4 /
452 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
453 for (i = 0; i < num_deps; ++i) {
f4b92fcd
CK
454 r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
455 syncobj_deps[i].point,
456 syncobj_deps[i].flags);
d4e8ad90
CK
457 if (r)
458 return r;
459 }
460
461 return 0;
462}
463
f4b92fcd
CK
464static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
465 struct amdgpu_cs_chunk *chunk)
d4e8ad90 466{
f4b92fcd 467 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
d4e8ad90
CK
468 unsigned num_deps;
469 int i;
470
d4e8ad90
CK
471 num_deps = chunk->length_dw * 4 /
472 sizeof(struct drm_amdgpu_cs_chunk_sem);
473
474 if (p->post_deps)
475 return -EINVAL;
476
477 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
478 GFP_KERNEL);
479 p->num_post_deps = 0;
480
481 if (!p->post_deps)
482 return -ENOMEM;
483
484
485 for (i = 0; i < num_deps; ++i) {
486 p->post_deps[i].syncobj =
487 drm_syncobj_find(p->filp, deps[i].handle);
488 if (!p->post_deps[i].syncobj)
489 return -EINVAL;
490 p->post_deps[i].chain = NULL;
491 p->post_deps[i].point = 0;
492 p->num_post_deps++;
493 }
494
495 return 0;
496}
497
f4b92fcd
CK
498static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
499 struct amdgpu_cs_chunk *chunk)
d4e8ad90 500{
f4b92fcd 501 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
d4e8ad90
CK
502 unsigned num_deps;
503 int i;
504
d4e8ad90
CK
505 num_deps = chunk->length_dw * 4 /
506 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
507
508 if (p->post_deps)
509 return -EINVAL;
510
511 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
512 GFP_KERNEL);
513 p->num_post_deps = 0;
514
515 if (!p->post_deps)
516 return -ENOMEM;
517
518 for (i = 0; i < num_deps; ++i) {
519 struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
520
521 dep->chain = NULL;
522 if (syncobj_deps[i].point) {
523 dep->chain = dma_fence_chain_alloc();
524 if (!dep->chain)
525 return -ENOMEM;
526 }
527
528 dep->syncobj = drm_syncobj_find(p->filp,
529 syncobj_deps[i].handle);
530 if (!dep->syncobj) {
531 dma_fence_chain_free(dep->chain);
532 return -EINVAL;
533 }
534 dep->point = syncobj_deps[i].point;
535 p->num_post_deps++;
536 }
537
538 return 0;
539}
540
f4b92fcd 541static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
d4e8ad90 542{
f4b92fcd 543 unsigned int num_ibs = 0, ce_preempt = 0, de_preempt = 0;
d4e8ad90
CK
544 int i, r;
545
546 for (i = 0; i < p->nchunks; ++i) {
547 struct amdgpu_cs_chunk *chunk;
548
549 chunk = &p->chunks[i];
550
551 switch (chunk->chunk_id) {
f4b92fcd
CK
552 case AMDGPU_CHUNK_ID_IB:
553 r = amdgpu_cs_p2_ib(p, chunk, &num_ibs,
554 &ce_preempt, &de_preempt);
555 if (r)
556 return r;
557 break;
d4e8ad90
CK
558 case AMDGPU_CHUNK_ID_DEPENDENCIES:
559 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
f4b92fcd 560 r = amdgpu_cs_p2_dependencies(p, chunk);
d4e8ad90
CK
561 if (r)
562 return r;
563 break;
564 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
f4b92fcd 565 r = amdgpu_cs_p2_syncobj_in(p, chunk);
d4e8ad90
CK
566 if (r)
567 return r;
568 break;
569 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
f4b92fcd 570 r = amdgpu_cs_p2_syncobj_out(p, chunk);
d4e8ad90
CK
571 if (r)
572 return r;
573 break;
574 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
f4b92fcd 575 r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
d4e8ad90
CK
576 if (r)
577 return r;
578 break;
579 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
f4b92fcd 580 r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
d4e8ad90
CK
581 if (r)
582 return r;
583 break;
584 }
585 }
586
587 return 0;
588}
589
95844d20
MO
590/* Convert microseconds to bytes. */
591static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
592{
593 if (us <= 0 || !adev->mm_stats.log2_max_MBps)
594 return 0;
595
596 /* Since accum_us is incremented by a million per second, just
597 * multiply it by the number of MB/s to get the number of bytes.
598 */
599 return us << adev->mm_stats.log2_max_MBps;
600}
601
602static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
603{
604 if (!adev->mm_stats.log2_max_MBps)
605 return 0;
606
607 return bytes >> adev->mm_stats.log2_max_MBps;
608}
609
610/* Returns how many bytes TTM can move right now. If no bytes can be moved,
611 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
612 * which means it can go over the threshold once. If that happens, the driver
613 * will be in debt and no other buffer migrations can be done until that debt
614 * is repaid.
615 *
616 * This approach allows moving a buffer of any size (it's important to allow
617 * that).
618 *
619 * The currency is simply time in microseconds and it increases as the clock
620 * ticks. The accumulated microseconds (us) are converted to bytes and
621 * returned.
d38ceaf9 622 */
00f06b24
JB
623static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
624 u64 *max_bytes,
625 u64 *max_vis_bytes)
d38ceaf9 626{
95844d20 627 s64 time_us, increment_us;
95844d20 628 u64 free_vram, total_vram, used_vram;
95844d20
MO
629 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
630 * throttling.
d38ceaf9 631 *
95844d20
MO
632 * It means that in order to get full max MBps, at least 5 IBs per
633 * second must be submitted and not more than 200ms apart from each
634 * other.
635 */
636 const s64 us_upper_bound = 200000;
d38ceaf9 637
00f06b24
JB
638 if (!adev->mm_stats.log2_max_MBps) {
639 *max_bytes = 0;
640 *max_vis_bytes = 0;
641 return;
642 }
95844d20 643
a5ccfe5c 644 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
7db47b83 645 used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
95844d20
MO
646 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
647
648 spin_lock(&adev->mm_stats.lock);
649
650 /* Increase the amount of accumulated us. */
651 time_us = ktime_to_us(ktime_get());
652 increment_us = time_us - adev->mm_stats.last_update_us;
653 adev->mm_stats.last_update_us = time_us;
654 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
f3729f7b 655 us_upper_bound);
95844d20
MO
656
657 /* This prevents the short period of low performance when the VRAM
658 * usage is low and the driver is in debt or doesn't have enough
659 * accumulated us to fill VRAM quickly.
d38ceaf9 660 *
95844d20
MO
661 * The situation can occur in these cases:
662 * - a lot of VRAM is freed by userspace
663 * - the presence of a big buffer causes a lot of evictions
664 * (solution: split buffers into smaller ones)
d38ceaf9 665 *
95844d20
MO
666 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
667 * accum_us to a positive number.
d38ceaf9 668 */
95844d20
MO
669 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
670 s64 min_us;
671
58398727 672 /* Be more aggressive on dGPUs. Try to fill a portion of free
95844d20
MO
673 * VRAM now.
674 */
675 if (!(adev->flags & AMD_IS_APU))
676 min_us = bytes_to_us(adev, free_vram / 4);
677 else
678 min_us = 0; /* Reset accum_us on APUs. */
679
680 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
681 }
d38ceaf9 682
00f06b24 683 /* This is set to 0 if the driver is in debt to disallow (optional)
95844d20
MO
684 * buffer moves.
685 */
00f06b24
JB
686 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
687
688 /* Do the same for visible VRAM if half of it is free */
c8c5e569 689 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
770d13b1 690 u64 total_vis_vram = adev->gmc.visible_vram_size;
3c848bb3 691 u64 used_vis_vram =
ec6aae97 692 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
00f06b24
JB
693
694 if (used_vis_vram < total_vis_vram) {
695 u64 free_vis_vram = total_vis_vram - used_vis_vram;
696 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
697 increment_us, us_upper_bound);
698
699 if (free_vis_vram >= total_vis_vram / 2)
700 adev->mm_stats.accum_us_vis =
701 max(bytes_to_us(adev, free_vis_vram / 2),
702 adev->mm_stats.accum_us_vis);
703 }
704
705 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
706 } else {
707 *max_vis_bytes = 0;
708 }
95844d20
MO
709
710 spin_unlock(&adev->mm_stats.lock);
95844d20
MO
711}
712
713/* Report how many bytes have really been moved for the last command
714 * submission. This can result in a debt that can stop buffer migrations
715 * temporarily.
716 */
00f06b24
JB
717void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
718 u64 num_vis_bytes)
95844d20
MO
719{
720 spin_lock(&adev->mm_stats.lock);
721 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
00f06b24 722 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
95844d20 723 spin_unlock(&adev->mm_stats.lock);
d38ceaf9
AD
724}
725
2a675640 726static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
14fd833e 727{
a7d64de6 728 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2a675640 729 struct amdgpu_cs_parser *p = param;
9251859a
RH
730 struct ttm_operation_ctx ctx = {
731 .interruptible = true,
732 .no_wait_gpu = false,
c44dfe4d 733 .resv = bo->tbo.base.resv
9251859a 734 };
14fd833e
CZ
735 uint32_t domain;
736 int r;
737
4671078e 738 if (bo->tbo.pin_count)
14fd833e
CZ
739 return 0;
740
95844d20
MO
741 /* Don't move this buffer if we have depleted our allowance
742 * to move it. Don't move anything if the threshold is zero.
14fd833e 743 */
4993ba02
CK
744 if (p->bytes_moved < p->bytes_moved_threshold &&
745 (!bo->tbo.base.dma_buf ||
746 list_empty(&bo->tbo.base.dma_buf->attachments))) {
c8c5e569 747 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
00f06b24
JB
748 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
749 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
750 * visible VRAM if we've depleted our allowance to do
751 * that.
752 */
753 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
6d7d9c5a 754 domain = bo->preferred_domains;
00f06b24
JB
755 else
756 domain = bo->allowed_domains;
757 } else {
6d7d9c5a 758 domain = bo->preferred_domains;
00f06b24
JB
759 }
760 } else {
14fd833e 761 domain = bo->allowed_domains;
00f06b24 762 }
14fd833e
CZ
763
764retry:
c704ab18 765 amdgpu_bo_placement_from_domain(bo, domain);
19be5570 766 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
6af046d2
CK
767
768 p->bytes_moved += ctx.bytes_moved;
c8c5e569 769 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
5422a28f 770 amdgpu_bo_in_cpu_visible_vram(bo))
6af046d2 771 p->bytes_moved_vis += ctx.bytes_moved;
14fd833e 772
1abdc3d7
CK
773 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
774 domain = bo->allowed_domains;
775 goto retry;
14fd833e
CZ
776 }
777
778 return r;
779}
780
761c2e82 781static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
a5b75058 782 struct list_head *validated)
d38ceaf9 783{
19be5570 784 struct ttm_operation_ctx ctx = { true, false };
d38ceaf9 785 struct amdgpu_bo_list_entry *lobj;
d38ceaf9
AD
786 int r;
787
a5b75058 788 list_for_each_entry(lobj, validated, tv.head) {
e83dfe4d 789 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
cc325d19 790 struct mm_struct *usermm;
d38ceaf9 791
cc325d19
CK
792 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
793 if (usermm && usermm != current->mm)
794 return -EPERM;
795
899fbde1
PY
796 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
797 lobj->user_invalidated && lobj->user_pages) {
c704ab18
CK
798 amdgpu_bo_placement_from_domain(bo,
799 AMDGPU_GEM_DOMAIN_CPU);
19be5570 800 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1b0c0f9d
CK
801 if (r)
802 return r;
899fbde1 803
a216ab09
CK
804 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
805 lobj->user_pages);
2f568dbd
CK
806 }
807
2a675640 808 r = amdgpu_cs_bo_validate(p, bo);
14fd833e 809 if (r)
36409d12 810 return r;
662bfa61 811
06f7f57e
PY
812 kvfree(lobj->user_pages);
813 lobj->user_pages = NULL;
d38ceaf9
AD
814 }
815 return 0;
816}
817
2a7d9bda
CK
818static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
819 union drm_amdgpu_cs *cs)
d38ceaf9
AD
820{
821 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
4a102ad4 822 struct amdgpu_vm *vm = &fpriv->vm;
2f568dbd 823 struct amdgpu_bo_list_entry *e;
a5b75058 824 struct list_head duplicates;
636ce25c 825 int r;
d38ceaf9 826
2a7d9bda
CK
827 INIT_LIST_HEAD(&p->validated);
828
964d0fbf 829 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
0cb7c1f0
CK
830 if (cs->in.bo_list_handle) {
831 if (p->bo_list)
832 return -EINVAL;
964d0fbf 833
52c054ca
CK
834 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
835 &p->bo_list);
836 if (r)
837 return r;
4a102ad4
CK
838 } else if (!p->bo_list) {
839 /* Create a empty bo_list when no handle is provided */
840 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
841 &p->bo_list);
842 if (r)
843 return r;
52c054ca
CK
844 }
845
5df79aeb
LT
846 mutex_lock(&p->bo_list->bo_list_mutex);
847
07daa8a0 848 /* One for TTM and one for the CS job */
049aca43 849 amdgpu_bo_list_for_each_entry(e, p->bo_list)
07daa8a0 850 e->tv.num_shared = 2;
049aca43 851
4a102ad4 852 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
d38ceaf9 853
3c0eea6c 854 INIT_LIST_HEAD(&duplicates);
56467ebf 855 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
d38ceaf9 856
e83dfe4d 857 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
91acbeb6
CK
858 list_add(&p->uf_entry.tv.head, &p->validated);
859
899fbde1
PY
860 /* Get userptr backing pages. If pages are updated after registered
861 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
862 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
863 */
864 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
865 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
866 bool userpage_invalidated = false;
867 int i;
868
869 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
870 sizeof(struct page *),
871 GFP_KERNEL | __GFP_ZERO);
872 if (!e->user_pages) {
147ab7a1 873 DRM_ERROR("kvmalloc_array failure\n");
068421b1
PY
874 r = -ENOMEM;
875 goto out_free_user_pages;
2f568dbd
CK
876 }
877
e5eaa7cc 878 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
899fbde1
PY
879 if (r) {
880 kvfree(e->user_pages);
881 e->user_pages = NULL;
3da2c382 882 goto out_free_user_pages;
2f568dbd
CK
883 }
884
899fbde1
PY
885 for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
886 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
887 userpage_invalidated = true;
888 break;
2f568dbd
CK
889 }
890 }
899fbde1
PY
891 e->user_invalidated = userpage_invalidated;
892 }
2f568dbd 893
899fbde1 894 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
9165fb87 895 &duplicates);
899fbde1
PY
896 if (unlikely(r != 0)) {
897 if (r != -ERESTARTSYS)
898 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
3da2c382 899 goto out_free_user_pages;
2f568dbd 900 }
a5b75058 901
8c505bdc
CK
902 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
903 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
904
905 e->bo_va = amdgpu_vm_bo_find(vm, bo);
8c505bdc
CK
906 }
907
461fa7b0
KX
908 /* Move fence waiting after getting reservation lock of
909 * PD root. Then there is no need on a ctx mutex lock.
910 */
911 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
912 if (unlikely(r != 0)) {
913 if (r != -ERESTARTSYS)
914 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
915 goto error_validate;
916 }
917
00f06b24
JB
918 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
919 &p->bytes_moved_vis_threshold);
f69f90a1 920 p->bytes_moved = 0;
00f06b24 921 p->bytes_moved_vis = 0;
f69f90a1 922
f7da30d9 923 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
2a675640 924 amdgpu_cs_bo_validate, p);
f7da30d9
CK
925 if (r) {
926 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
927 goto error_validate;
928 }
929
f69f90a1 930 r = amdgpu_cs_list_validate(p, &duplicates);
a3e7738d 931 if (r)
a5b75058
CK
932 goto error_validate;
933
f69f90a1 934 r = amdgpu_cs_list_validate(p, &p->validated);
a3e7738d 935 if (r)
a8480309
CK
936 goto error_validate;
937
4953b6b2 938 if (p->uf_entry.tv.bo) {
e83dfe4d 939 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
c855e250 940
c5835bbb 941 r = amdgpu_ttm_alloc_gart(&uf->tbo);
4953b6b2
CK
942 if (r)
943 goto error_validate;
944
c855e250
CK
945 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
946 }
b5f5acbc 947
4953b6b2
CK
948 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
949 p->bytes_moved_vis);
950 amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
951 p->bo_list->gws_obj, p->bo_list->oa_obj);
952 return 0;
953
a5b75058 954error_validate:
4953b6b2 955 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
3da2c382
PY
956
957out_free_user_pages:
4953b6b2
CK
958 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
959 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
3da2c382 960
4953b6b2
CK
961 if (!e->user_pages)
962 continue;
963 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
964 kvfree(e->user_pages);
965 e->user_pages = NULL;
3da2c382 966 }
d38ceaf9
AD
967 return r;
968}
969
d4e8ad90 970static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
d38ceaf9 971{
d4e8ad90 972 int i;
d38ceaf9 973
d4e8ad90
CK
974 if (!trace_amdgpu_cs_enabled())
975 return;
e83dfe4d 976
d4e8ad90
CK
977 for (i = 0; i < parser->job->num_ibs; i++)
978 trace_amdgpu_cs(parser, i);
d38ceaf9
AD
979}
980
f4b92fcd 981static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p)
d38ceaf9 982{
f4b92fcd 983 struct amdgpu_job *job = p->job;
c2b08e7a 984 struct amdgpu_ring *ring = amdgpu_job_ring(job);
f4b92fcd 985 unsigned int i;
39f7f69a 986 int r;
d38ceaf9 987
9a02ece4 988 /* Only for UVD/VCE VM emulation */
f4b92fcd
CK
989 if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
990 return 0;
9a02ece4 991
f4b92fcd
CK
992 for (i = 0; i < job->num_ibs; ++i) {
993 struct amdgpu_ib *ib = &job->ibs[i];
994 struct amdgpu_bo_va_mapping *m;
995 struct amdgpu_bo *aobj;
996 uint64_t va_start;
997 uint8_t *kptr;
9a02ece4 998
f4b92fcd
CK
999 va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
1000 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
1001 if (r) {
1002 DRM_ERROR("IB va_start is invalid\n");
1003 return r;
1004 }
9a02ece4 1005
f4b92fcd
CK
1006 if ((va_start + ib->length_dw * 4) >
1007 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
1008 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
1009 return -EINVAL;
1010 }
9a02ece4 1011
f4b92fcd
CK
1012 /* the IB should be reserved at this point */
1013 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
1014 if (r) {
1015 return r;
1016 }
1017
1018 kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
67dd1a36 1019
f4b92fcd
CK
1020 if (ring->funcs->parse_cs) {
1021 memcpy(ib->ptr, kptr, ib->length_dw * 4);
1022 amdgpu_bo_kunmap(aobj);
1023
1024 r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
1025 if (r)
1026 return r;
1027 } else {
1028 ib->ptr = (uint32_t *)kptr;
1029 r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
1030 amdgpu_bo_kunmap(aobj);
1031 if (r)
1032 return r;
67dd1a36 1033 }
6f0308eb 1034 }
2b48d323 1035
f4b92fcd
CK
1036 return 0;
1037}
1038
1039static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
1040{
1041 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1042 struct amdgpu_device *adev = p->adev;
1043 struct amdgpu_vm *vm = &fpriv->vm;
1044 struct amdgpu_bo_list_entry *e;
1045 struct amdgpu_bo_va *bo_va;
1046 struct amdgpu_bo *bo;
1047 int r;
1048
d4e8ad90
CK
1049 if (!p->job->vm)
1050 return 0;
2624dd15 1051
d4e8ad90
CK
1052 r = amdgpu_vm_clear_freed(adev, vm, NULL);
1053 if (r)
660e8558 1054 return r;
660e8558 1055
d4e8ad90
CK
1056 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
1057 if (r)
1058 return r;
660e8558 1059
d4e8ad90
CK
1060 r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
1061 if (r)
1062 return r;
660e8558 1063
d4e8ad90
CK
1064 if (fpriv->csa_va) {
1065 bo_va = fpriv->csa_va;
1066 BUG_ON(!bo_va);
1067 r = amdgpu_vm_bo_update(adev, bo_va, false);
2624dd15
CZ
1068 if (r)
1069 return r;
2624dd15 1070
d4e8ad90 1071 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
660e8558
DA
1072 if (r)
1073 return r;
1074 }
2624dd15 1075
d4e8ad90
CK
1076 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1077 /* ignore duplicates */
1078 bo = ttm_to_amdgpu_bo(e->tv.bo);
1079 if (!bo)
1080 continue;
660e8558 1081
d4e8ad90
CK
1082 bo_va = e->bo_va;
1083 if (bo_va == NULL)
1084 continue;
a1d6b190 1085
d4e8ad90
CK
1086 r = amdgpu_vm_bo_update(adev, bo_va, false);
1087 if (r)
1088 return r;
2624dd15 1089
d4e8ad90
CK
1090 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
1091 if (r)
1092 return r;
660e8558 1093 }
2624dd15 1094
d4e8ad90
CK
1095 r = amdgpu_vm_handle_moved(adev, vm);
1096 if (r)
1097 return r;
2624dd15 1098
d4e8ad90
CK
1099 r = amdgpu_vm_update_pdes(adev, vm, false);
1100 if (r)
1101 return r;
5a6a4c9d 1102
d4e8ad90
CK
1103 r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
1104 if (r)
1105 return r;
2624dd15 1106
d4e8ad90 1107 p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
2624dd15 1108
d4e8ad90
CK
1109 if (amdgpu_vm_debug) {
1110 /* Invalidate all BOs to test for userspace bugs */
1111 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1112 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
2624dd15 1113
d4e8ad90
CK
1114 /* ignore duplicates */
1115 if (!bo)
1116 continue;
2624dd15 1117
d4e8ad90 1118 amdgpu_vm_bo_invalidate(adev, bo, false);
2624dd15 1119 }
2624dd15
CZ
1120 }
1121
660e8558
DA
1122 return 0;
1123}
1124
d4e8ad90 1125static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
6f0308eb 1126{
d4e8ad90
CK
1127 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1128 struct amdgpu_bo_list_entry *e;
1129 int r;
effd924d 1130
d4e8ad90
CK
1131 list_for_each_entry(e, &p->validated, tv.head) {
1132 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1133 struct dma_resv *resv = bo->tbo.base.resv;
1134 enum amdgpu_sync_mode sync_mode;
91e1a520 1135
d4e8ad90
CK
1136 sync_mode = amdgpu_bo_explicit_sync(bo) ?
1137 AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
1138 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
1139 &fpriv->vm);
1140 if (r)
1141 return r;
2b48d323 1142 }
dd80d9c8 1143 return 0;
2b48d323
CK
1144}
1145
660e8558
DA
1146static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1147{
1148 int i;
1149
2624dd15
CZ
1150 for (i = 0; i < p->num_post_deps; ++i) {
1151 if (p->post_deps[i].chain && p->post_deps[i].point) {
1152 drm_syncobj_add_point(p->post_deps[i].syncobj,
1153 p->post_deps[i].chain,
1154 p->fence, p->post_deps[i].point);
1155 p->post_deps[i].chain = NULL;
1156 } else {
1157 drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1158 p->fence);
1159 }
1160 }
660e8558
DA
1161}
1162
cd75dc68
CK
1163static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1164 union drm_amdgpu_cs *cs)
1165{
8ab19ea6 1166 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0d346a14 1167 struct drm_sched_entity *entity = p->entity;
4a102ad4 1168 struct amdgpu_bo_list_entry *e;
cd75dc68 1169 struct amdgpu_job *job;
eb01abc7 1170 uint64_t seq;
e686941a 1171 int r;
cd75dc68 1172
4a2de54d
CK
1173 job = p->job;
1174 p->job = NULL;
1175
dbe48d03
DV
1176 drm_sched_job_arm(&job->base);
1177
81fa1af3
JG
1178 /* No memory allocation is allowed while holding the notifier lock.
1179 * The lock is held until amdgpu_cs_submit is finished and fence is
1180 * added to BOs.
899fbde1 1181 */
81fa1af3 1182 mutex_lock(&p->adev->notifier_lock);
899fbde1
PY
1183
1184 /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1185 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1186 */
4a102ad4 1187 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
e83dfe4d 1188 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
39f7f69a 1189
899fbde1
PY
1190 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1191 }
1192 if (r) {
1193 r = -EAGAIN;
1194 goto error_abort;
3fe89771
CK
1195 }
1196
f54d1867 1197 p->fence = dma_fence_get(&job->base.s_fence->finished);
660e8558 1198
69493c03 1199 seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
660e8558
DA
1200 amdgpu_cs_post_dependencies(p);
1201
d98ff24e
CK
1202 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1203 !p->ctx->preamble_presented) {
1204 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1205 p->ctx->preamble_presented = true;
1206 }
1207
eb01abc7
ML
1208 cs->out.handle = seq;
1209 job->uf_sequence = seq;
1210
a5fb4ec2 1211 amdgpu_job_free_resources(job);
cd75dc68
CK
1212
1213 trace_amdgpu_cs_ioctl(job);
8ab19ea6 1214 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
0e10e9a1 1215 drm_sched_entity_push_job(&job->base);
3fe89771 1216
b995795b
CK
1217 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1218
047a1b87
CK
1219 /* Make sure all BOs are remembered as writers */
1220 amdgpu_bo_list_for_each_entry(e, p->bo_list)
1221 e->tv.num_shared = 0;
8c505bdc 1222
3fe89771 1223 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
81fa1af3 1224 mutex_unlock(&p->adev->notifier_lock);
5df79aeb 1225 mutex_unlock(&p->bo_list->bo_list_mutex);
3fe89771 1226
cd75dc68 1227 return 0;
4a2de54d
CK
1228
1229error_abort:
26efecf9 1230 drm_sched_job_cleanup(&job->base);
81fa1af3 1231 mutex_unlock(&p->adev->notifier_lock);
4a2de54d 1232 amdgpu_job_free(job);
4a2de54d 1233 return r;
cd75dc68
CK
1234}
1235
88c98d54 1236/* Cleanup the parser structure */
f4b92fcd 1237static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
88c98d54
CK
1238{
1239 unsigned i;
1240
88c98d54
CK
1241 for (i = 0; i < parser->num_post_deps; i++) {
1242 drm_syncobj_put(parser->post_deps[i].syncobj);
1243 kfree(parser->post_deps[i].chain);
1244 }
1245 kfree(parser->post_deps);
1246
1247 dma_fence_put(parser->fence);
1248
f4b92fcd 1249 if (parser->ctx) {
88c98d54 1250 amdgpu_ctx_put(parser->ctx);
f4b92fcd 1251 }
88c98d54
CK
1252 if (parser->bo_list)
1253 amdgpu_bo_list_put(parser->bo_list);
1254
1255 for (i = 0; i < parser->nchunks; i++)
1256 kvfree(parser->chunks[i].kdata);
1257 kvfree(parser->chunks);
1258 if (parser->job)
1259 amdgpu_job_free(parser->job);
1260 if (parser->uf_entry.tv.bo) {
1261 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
1262
1263 amdgpu_bo_unref(&uf);
1264 }
1265}
1266
049fc527
CZ
1267int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1268{
1348969a 1269 struct amdgpu_device *adev = drm_to_adev(dev);
88c98d54 1270 struct amdgpu_cs_parser parser;
44444574 1271 int r;
049fc527 1272
7c6e68c7
AG
1273 if (amdgpu_ras_intr_triggered())
1274 return -EHWPOISON;
1275
0c418f10 1276 if (!adev->accel_working)
049fc527 1277 return -EBUSY;
2b48d323 1278
88c98d54 1279 r = amdgpu_cs_parser_init(&parser, adev, filp, data);
d38ceaf9 1280 if (r) {
8e1d88f9 1281 if (printk_ratelimit())
1282 DRM_ERROR("Failed to initialize parser %d!\n", r);
f4b92fcd 1283 return r;
26a6980c
CK
1284 }
1285
88c98d54
CK
1286 r = amdgpu_cs_pass1(&parser, data);
1287 if (r)
f4b92fcd 1288 goto error_fini;
88c98d54 1289
f4b92fcd 1290 r = amdgpu_cs_pass2(&parser);
ad864d24 1291 if (r)
f4b92fcd 1292 goto error_fini;
7e7bf8de 1293
a414cd70
HR
1294 r = amdgpu_cs_parser_bos(&parser, data);
1295 if (r) {
1296 if (r == -ENOMEM)
1297 DRM_ERROR("Not enough memory for command submission!\n");
a3e7738d 1298 else if (r != -ERESTARTSYS && r != -EAGAIN)
a414cd70 1299 DRM_ERROR("Failed to process the buffer list %d!\n", r);
f4b92fcd 1300 goto error_fini;
26a6980c
CK
1301 }
1302
f4b92fcd
CK
1303 r = amdgpu_cs_patch_ibs(&parser);
1304 if (r)
1305 goto error_backoff;
26a6980c 1306
9a02ece4 1307 r = amdgpu_cs_vm_handling(&parser);
4fe63117 1308 if (r)
f4b92fcd 1309 goto error_backoff;
4fe63117 1310
d4e8ad90
CK
1311 r = amdgpu_cs_sync_rings(&parser);
1312 if (r)
f4b92fcd
CK
1313 goto error_backoff;
1314
1315 trace_amdgpu_cs_ibs(&parser);
d4e8ad90 1316
88c98d54 1317 r = amdgpu_cs_submit(&parser, data);
f4b92fcd
CK
1318 if (r)
1319 goto error_backoff;
1320
1321 amdgpu_cs_parser_fini(&parser);
1322 return 0;
1323
1324error_backoff:
1325 ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
1326 mutex_unlock(&parser.bo_list->bo_list_mutex);
899fbde1 1327
f4b92fcd
CK
1328error_fini:
1329 amdgpu_cs_parser_fini(&parser);
d38ceaf9
AD
1330 return r;
1331}
1332
1333/**
1334 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1335 *
1336 * @dev: drm device
1337 * @data: data from userspace
1338 * @filp: file private
1339 *
1340 * Wait for the command submission identified by handle to finish.
1341 */
1342int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1343 struct drm_file *filp)
1344{
1345 union drm_amdgpu_wait_cs *wait = data;
d38ceaf9 1346 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
0d346a14 1347 struct drm_sched_entity *entity;
66b3cf2a 1348 struct amdgpu_ctx *ctx;
f54d1867 1349 struct dma_fence *fence;
d38ceaf9
AD
1350 long r;
1351
66b3cf2a
JZ
1352 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1353 if (ctx == NULL)
1354 return -EINVAL;
d38ceaf9 1355
0d346a14
CK
1356 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1357 wait->in.ring, &entity);
effd924d
AR
1358 if (r) {
1359 amdgpu_ctx_put(ctx);
1360 return r;
1361 }
1362
0d346a14 1363 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
4b559c90
CZ
1364 if (IS_ERR(fence))
1365 r = PTR_ERR(fence);
1366 else if (fence) {
f54d1867 1367 r = dma_fence_wait_timeout(fence, true, timeout);
7a0a48dd
CK
1368 if (r > 0 && fence->error)
1369 r = fence->error;
f54d1867 1370 dma_fence_put(fence);
4b559c90
CZ
1371 } else
1372 r = 1;
049fc527 1373
66b3cf2a 1374 amdgpu_ctx_put(ctx);
d38ceaf9
AD
1375 if (r < 0)
1376 return r;
1377
1378 memset(wait, 0, sizeof(*wait));
1379 wait->out.status = (r == 0);
1380
1381 return 0;
1382}
1383
eef18a82
JZ
1384/**
1385 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1386 *
1387 * @adev: amdgpu device
1388 * @filp: file private
1389 * @user: drm_amdgpu_fence copied from user space
1390 */
1391static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1392 struct drm_file *filp,
1393 struct drm_amdgpu_fence *user)
1394{
0d346a14 1395 struct drm_sched_entity *entity;
eef18a82
JZ
1396 struct amdgpu_ctx *ctx;
1397 struct dma_fence *fence;
1398 int r;
1399
eef18a82
JZ
1400 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1401 if (ctx == NULL)
1402 return ERR_PTR(-EINVAL);
1403
0d346a14
CK
1404 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1405 user->ring, &entity);
effd924d
AR
1406 if (r) {
1407 amdgpu_ctx_put(ctx);
1408 return ERR_PTR(r);
1409 }
1410
0d346a14 1411 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
eef18a82
JZ
1412 amdgpu_ctx_put(ctx);
1413
1414 return fence;
1415}
1416
7ca24cf2
MO
1417int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1418 struct drm_file *filp)
1419{
1348969a 1420 struct amdgpu_device *adev = drm_to_adev(dev);
7ca24cf2
MO
1421 union drm_amdgpu_fence_to_handle *info = data;
1422 struct dma_fence *fence;
1423 struct drm_syncobj *syncobj;
1424 struct sync_file *sync_file;
1425 int fd, r;
1426
7ca24cf2
MO
1427 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1428 if (IS_ERR(fence))
1429 return PTR_ERR(fence);
1430
4e917713
CK
1431 if (!fence)
1432 fence = dma_fence_get_stub();
1433
7ca24cf2
MO
1434 switch (info->in.what) {
1435 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1436 r = drm_syncobj_create(&syncobj, 0, fence);
1437 dma_fence_put(fence);
1438 if (r)
1439 return r;
1440 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1441 drm_syncobj_put(syncobj);
1442 return r;
1443
1444 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1445 r = drm_syncobj_create(&syncobj, 0, fence);
1446 dma_fence_put(fence);
1447 if (r)
1448 return r;
c4c5ae67 1449 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
7ca24cf2
MO
1450 drm_syncobj_put(syncobj);
1451 return r;
1452
1453 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1454 fd = get_unused_fd_flags(O_CLOEXEC);
1455 if (fd < 0) {
1456 dma_fence_put(fence);
1457 return fd;
1458 }
1459
1460 sync_file = sync_file_create(fence);
1461 dma_fence_put(fence);
1462 if (!sync_file) {
1463 put_unused_fd(fd);
1464 return -ENOMEM;
1465 }
1466
1467 fd_install(fd, sync_file->file);
1468 info->out.handle = fd;
1469 return 0;
1470
1471 default:
dfced44f 1472 dma_fence_put(fence);
7ca24cf2
MO
1473 return -EINVAL;
1474 }
1475}
1476
eef18a82 1477/**
3bffd71d 1478 * amdgpu_cs_wait_all_fences - wait on all fences to signal
eef18a82
JZ
1479 *
1480 * @adev: amdgpu device
1481 * @filp: file private
1482 * @wait: wait parameters
1483 * @fences: array of drm_amdgpu_fence
1484 */
1485static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1486 struct drm_file *filp,
1487 union drm_amdgpu_wait_fences *wait,
1488 struct drm_amdgpu_fence *fences)
1489{
1490 uint32_t fence_count = wait->in.fence_count;
1491 unsigned int i;
1492 long r = 1;
1493
1494 for (i = 0; i < fence_count; i++) {
1495 struct dma_fence *fence;
1496 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1497
1498 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1499 if (IS_ERR(fence))
1500 return PTR_ERR(fence);
1501 else if (!fence)
1502 continue;
1503
1504 r = dma_fence_wait_timeout(fence, true, timeout);
32df87df 1505 dma_fence_put(fence);
eef18a82
JZ
1506 if (r < 0)
1507 return r;
1508
1509 if (r == 0)
1510 break;
7a0a48dd
CK
1511
1512 if (fence->error)
1513 return fence->error;
eef18a82
JZ
1514 }
1515
1516 memset(wait, 0, sizeof(*wait));
1517 wait->out.status = (r > 0);
1518
1519 return 0;
1520}
1521
1522/**
1523 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1524 *
1525 * @adev: amdgpu device
1526 * @filp: file private
1527 * @wait: wait parameters
1528 * @fences: array of drm_amdgpu_fence
1529 */
1530static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1531 struct drm_file *filp,
1532 union drm_amdgpu_wait_fences *wait,
1533 struct drm_amdgpu_fence *fences)
1534{
1535 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1536 uint32_t fence_count = wait->in.fence_count;
1537 uint32_t first = ~0;
1538 struct dma_fence **array;
1539 unsigned int i;
1540 long r;
1541
1542 /* Prepare the fence array */
1543 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1544
1545 if (array == NULL)
1546 return -ENOMEM;
1547
1548 for (i = 0; i < fence_count; i++) {
1549 struct dma_fence *fence;
1550
1551 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1552 if (IS_ERR(fence)) {
1553 r = PTR_ERR(fence);
1554 goto err_free_fence_array;
1555 } else if (fence) {
1556 array[i] = fence;
1557 } else { /* NULL, the fence has been already signaled */
1558 r = 1;
a2138eaf 1559 first = i;
eef18a82
JZ
1560 goto out;
1561 }
1562 }
1563
1564 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1565 &first);
1566 if (r < 0)
1567 goto err_free_fence_array;
1568
1569out:
1570 memset(wait, 0, sizeof(*wait));
1571 wait->out.status = (r > 0);
1572 wait->out.first_signaled = first;
cdadab89 1573
eb174c77 1574 if (first < fence_count && array[first])
cdadab89
ED
1575 r = array[first]->error;
1576 else
1577 r = 0;
eef18a82
JZ
1578
1579err_free_fence_array:
1580 for (i = 0; i < fence_count; i++)
1581 dma_fence_put(array[i]);
1582 kfree(array);
1583
1584 return r;
1585}
1586
1587/**
1588 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1589 *
1590 * @dev: drm device
1591 * @data: data from userspace
1592 * @filp: file private
1593 */
1594int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1595 struct drm_file *filp)
1596{
1348969a 1597 struct amdgpu_device *adev = drm_to_adev(dev);
eef18a82
JZ
1598 union drm_amdgpu_wait_fences *wait = data;
1599 uint32_t fence_count = wait->in.fence_count;
1600 struct drm_amdgpu_fence *fences_user;
1601 struct drm_amdgpu_fence *fences;
1602 int r;
1603
1604 /* Get the fences from userspace */
1605 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1606 GFP_KERNEL);
1607 if (fences == NULL)
1608 return -ENOMEM;
1609
7ecc245a 1610 fences_user = u64_to_user_ptr(wait->in.fences);
eef18a82
JZ
1611 if (copy_from_user(fences, fences_user,
1612 sizeof(struct drm_amdgpu_fence) * fence_count)) {
1613 r = -EFAULT;
1614 goto err_free_fences;
1615 }
1616
1617 if (wait->in.wait_all)
1618 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1619 else
1620 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1621
1622err_free_fences:
1623 kfree(fences);
1624
1625 return r;
1626}
1627
d38ceaf9 1628/**
3bffd71d 1629 * amdgpu_cs_find_mapping - find bo_va for VM address
d38ceaf9
AD
1630 *
1631 * @parser: command submission parser context
1632 * @addr: VM address
1633 * @bo: resulting BO of the mapping found
fec3124d 1634 * @map: Placeholder to return found BO mapping
d38ceaf9
AD
1635 *
1636 * Search the buffer objects in the command submission context for a certain
1637 * virtual memory address. Returns allocation structure when found, NULL
1638 * otherwise.
1639 */
9cca0b8e
CK
1640int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1641 uint64_t addr, struct amdgpu_bo **bo,
1642 struct amdgpu_bo_va_mapping **map)
d38ceaf9 1643{
aebc5e6f 1644 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
19be5570 1645 struct ttm_operation_ctx ctx = { false, false };
aebc5e6f 1646 struct amdgpu_vm *vm = &fpriv->vm;
d38ceaf9 1647 struct amdgpu_bo_va_mapping *mapping;
c855e250
CK
1648 int r;
1649
d38ceaf9 1650 addr /= AMDGPU_GPU_PAGE_SIZE;
c855e250 1651
aebc5e6f
CK
1652 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1653 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1654 return -EINVAL;
c855e250 1655
aebc5e6f
CK
1656 *bo = mapping->bo_va->base.bo;
1657 *map = mapping;
03f48dd5 1658
aebc5e6f 1659 /* Double check that the BO is reserved by this CS */
52791eee 1660 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
aebc5e6f 1661 return -EINVAL;
03f48dd5 1662
4b6b691e
CK
1663 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1664 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
c704ab18 1665 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
19be5570 1666 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
4b6b691e 1667 if (r)
03f48dd5 1668 return r;
c855e250
CK
1669 }
1670
c5835bbb 1671 return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
c855e250 1672}