drm/radeon: cope with foreign fences inside display
[linux-2.6-block.git] / drivers / gpu / drm / radeon / radeon_cs.c
CommitLineData
771fe6b9
JG
1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
4330441a 27#include <linux/list_sort.h>
760285e7
DH
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
771fe6b9
JG
30#include "radeon_reg.h"
31#include "radeon.h"
860024e5 32#include "radeon_trace.h"
771fe6b9 33
c9b76548
MO
34#define RADEON_CS_MAX_PRIORITY 32u
35#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37/* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43};
44
45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46{
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51}
52
53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55{
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62}
63
64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66{
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73}
74
1109ca09 75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
771fe6b9
JG
76{
77 struct drm_device *ddev = p->rdev->ddev;
78 struct radeon_cs_chunk *chunk;
c9b76548 79 struct radeon_cs_buckets buckets;
771fe6b9 80 unsigned i, j;
f72a113a
CK
81 bool duplicate, need_mmap_lock = false;
82 int r;
771fe6b9
JG
83
84 if (p->chunk_relocs_idx == -1) {
85 return 0;
86 }
87 chunk = &p->chunks[p->chunk_relocs_idx];
cf4ccd01 88 p->dma_reloc_idx = 0;
771fe6b9
JG
89 /* FIXME: we assume that each relocs use 4 dwords */
90 p->nrelocs = chunk->length_dw / 4;
91 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
92 if (p->relocs_ptr == NULL) {
93 return -ENOMEM;
94 }
95 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
96 if (p->relocs == NULL) {
97 return -ENOMEM;
98 }
c9b76548
MO
99
100 radeon_cs_buckets_init(&buckets);
101
771fe6b9
JG
102 for (i = 0; i < p->nrelocs; i++) {
103 struct drm_radeon_cs_reloc *r;
c9b76548 104 unsigned priority;
771fe6b9
JG
105
106 duplicate = false;
107 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
16557f1e 108 for (j = 0; j < i; j++) {
771fe6b9
JG
109 if (r->handle == p->relocs[j].handle) {
110 p->relocs_ptr[i] = &p->relocs[j];
111 duplicate = true;
112 break;
113 }
114 }
4474f3a9 115 if (duplicate) {
16557f1e 116 p->relocs[i].handle = 0;
4474f3a9
CK
117 continue;
118 }
119
120 p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
121 r->handle);
122 if (p->relocs[i].gobj == NULL) {
123 DRM_ERROR("gem object lookup failed 0x%x\n",
124 r->handle);
125 return -ENOENT;
126 }
127 p->relocs_ptr[i] = &p->relocs[i];
128 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
c9b76548
MO
129
130 /* The userspace buffer priorities are from 0 to 15. A higher
131 * number means the buffer is more important.
132 * Also, the buffers used for write have a higher priority than
133 * the buffers used for read only, which doubles the range
134 * to 0 to 31. 32 is reserved for the kernel driver.
135 */
701e1e78
CK
136 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
137 + !!r->write_domain;
4474f3a9 138
4f66c599 139 /* the first reloc of an UVD job is the msg and that must be in
b6a7eeea
CK
140 VRAM, also but everything into VRAM on AGP cards and older
141 IGP chips to avoid image corruptions */
4f66c599 142 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
b6a7eeea
CK
143 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
144 p->rdev->family == CHIP_RS780 ||
145 p->rdev->family == CHIP_RS880)) {
146
bcf6f1e9 147 /* TODO: is this still needed for NI+ ? */
ce6758c8 148 p->relocs[i].prefered_domains =
f2ba57b5
CK
149 RADEON_GEM_DOMAIN_VRAM;
150
ce6758c8 151 p->relocs[i].allowed_domains =
f2ba57b5
CK
152 RADEON_GEM_DOMAIN_VRAM;
153
c9b76548
MO
154 /* prioritize this over any other relocation */
155 priority = RADEON_CS_MAX_PRIORITY;
f2ba57b5
CK
156 } else {
157 uint32_t domain = r->write_domain ?
158 r->write_domain : r->read_domains;
159
ec65da38
MO
160 if (domain & RADEON_GEM_DOMAIN_CPU) {
161 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
162 "for command submission\n");
163 return -EINVAL;
164 }
165
ce6758c8 166 p->relocs[i].prefered_domains = domain;
f2ba57b5
CK
167 if (domain == RADEON_GEM_DOMAIN_VRAM)
168 domain |= RADEON_GEM_DOMAIN_GTT;
ce6758c8 169 p->relocs[i].allowed_domains = domain;
f2ba57b5 170 }
4474f3a9 171
f72a113a
CK
172 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
173 uint32_t domain = p->relocs[i].prefered_domains;
174 if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
175 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
176 "allowed for userptr BOs\n");
177 return -EINVAL;
178 }
179 need_mmap_lock = true;
180 domain = RADEON_GEM_DOMAIN_GTT;
181 p->relocs[i].prefered_domains = domain;
182 p->relocs[i].allowed_domains = domain;
183 }
184
df0af440 185 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
298593b6 186 p->relocs[i].tv.shared = !r->write_domain;
4474f3a9
CK
187 p->relocs[i].handle = r->handle;
188
df0af440 189 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
c9b76548 190 priority);
771fe6b9 191 }
c9b76548
MO
192
193 radeon_cs_buckets_get_list(&buckets, &p->validated);
194
6d2f2944
CK
195 if (p->cs_flags & RADEON_CS_USE_VM)
196 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
197 &p->validated);
f72a113a
CK
198 if (need_mmap_lock)
199 down_read(&current->mm->mmap_sem);
200
201 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
202
203 if (need_mmap_lock)
204 up_read(&current->mm->mmap_sem);
6d2f2944 205
f72a113a 206 return r;
771fe6b9
JG
207}
208
721604a1
JG
209static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
210{
211 p->priority = priority;
212
213 switch (ring) {
214 default:
215 DRM_ERROR("unknown ring id: %d\n", ring);
216 return -EINVAL;
217 case RADEON_CS_RING_GFX:
218 p->ring = RADEON_RING_TYPE_GFX_INDEX;
219 break;
220 case RADEON_CS_RING_COMPUTE:
963e81f9 221 if (p->rdev->family >= CHIP_TAHITI) {
8d5ef7b1
AD
222 if (p->priority > 0)
223 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
224 else
225 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
226 } else
227 p->ring = RADEON_RING_TYPE_GFX_INDEX;
721604a1 228 break;
278a334c
AD
229 case RADEON_CS_RING_DMA:
230 if (p->rdev->family >= CHIP_CAYMAN) {
231 if (p->priority > 0)
232 p->ring = R600_RING_TYPE_DMA_INDEX;
233 else
234 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
b9ace36f 235 } else if (p->rdev->family >= CHIP_RV770) {
278a334c
AD
236 p->ring = R600_RING_TYPE_DMA_INDEX;
237 } else {
238 return -EINVAL;
239 }
240 break;
f2ba57b5
CK
241 case RADEON_CS_RING_UVD:
242 p->ring = R600_RING_TYPE_UVD_INDEX;
243 break;
d93f7937
CK
244 case RADEON_CS_RING_VCE:
245 /* TODO: only use the low priority ring for now */
246 p->ring = TN_RING_TYPE_VCE1_INDEX;
247 break;
721604a1
JG
248 }
249 return 0;
250}
251
220907d9 252static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
93504fce 253{
220907d9 254 int i;
93504fce 255
cdac5504 256 for (i = 0; i < p->nrelocs; i++) {
f2c24b83 257 struct reservation_object *resv;
f2c24b83 258
f82cbddd 259 if (!p->relocs[i].robj)
cdac5504
CK
260 continue;
261
f2c24b83 262 resv = p->relocs[i].robj->tbo.resv;
298593b6
CK
263 radeon_semaphore_sync_resv(p->ib.semaphore, resv,
264 p->relocs[i].tv.shared);
8f676c4c 265 }
93504fce
CK
266}
267
9b00147d 268/* XXX: note that this is called from the legacy UMS CS ioctl as well */
771fe6b9
JG
269int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
270{
271 struct drm_radeon_cs *cs = data;
272 uint64_t *chunk_array_ptr;
721604a1
JG
273 unsigned size, i;
274 u32 ring = RADEON_CS_RING_GFX;
275 s32 priority = 0;
771fe6b9
JG
276
277 if (!cs->num_chunks) {
278 return 0;
279 }
280 /* get chunks */
281 INIT_LIST_HEAD(&p->validated);
282 p->idx = 0;
f2e39221
JG
283 p->ib.sa_bo = NULL;
284 p->ib.semaphore = NULL;
285 p->const_ib.sa_bo = NULL;
286 p->const_ib.semaphore = NULL;
771fe6b9
JG
287 p->chunk_ib_idx = -1;
288 p->chunk_relocs_idx = -1;
721604a1 289 p->chunk_flags_idx = -1;
dfcf5f36 290 p->chunk_const_ib_idx = -1;
771fe6b9
JG
291 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
292 if (p->chunks_array == NULL) {
293 return -ENOMEM;
294 }
295 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
1d6ac185 296 if (copy_from_user(p->chunks_array, chunk_array_ptr,
771fe6b9
JG
297 sizeof(uint64_t)*cs->num_chunks)) {
298 return -EFAULT;
299 }
721604a1 300 p->cs_flags = 0;
771fe6b9
JG
301 p->nchunks = cs->num_chunks;
302 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
303 if (p->chunks == NULL) {
304 return -ENOMEM;
305 }
306 for (i = 0; i < p->nchunks; i++) {
307 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
308 struct drm_radeon_cs_chunk user_chunk;
309 uint32_t __user *cdata;
310
311 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
1d6ac185 312 if (copy_from_user(&user_chunk, chunk_ptr,
771fe6b9
JG
313 sizeof(struct drm_radeon_cs_chunk))) {
314 return -EFAULT;
315 }
5176fdc4 316 p->chunks[i].length_dw = user_chunk.length_dw;
771fe6b9
JG
317 p->chunks[i].chunk_id = user_chunk.chunk_id;
318 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
319 p->chunk_relocs_idx = i;
320 }
321 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
322 p->chunk_ib_idx = i;
5176fdc4
DA
323 /* zero length IB isn't useful */
324 if (p->chunks[i].length_dw == 0)
325 return -EINVAL;
771fe6b9 326 }
dfcf5f36
AD
327 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
328 p->chunk_const_ib_idx = i;
329 /* zero length CONST IB isn't useful */
330 if (p->chunks[i].length_dw == 0)
331 return -EINVAL;
332 }
721604a1
JG
333 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
334 p->chunk_flags_idx = i;
335 /* zero length flags aren't useful */
336 if (p->chunks[i].length_dw == 0)
337 return -EINVAL;
e70f224c 338 }
5176fdc4 339
28a326c5
ML
340 size = p->chunks[i].length_dw;
341 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
342 p->chunks[i].user_ptr = cdata;
343 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
344 continue;
345
346 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
347 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
348 continue;
349 }
350
351 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
352 size *= sizeof(uint32_t);
353 if (p->chunks[i].kdata == NULL) {
354 return -ENOMEM;
355 }
1d6ac185 356 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
28a326c5
ML
357 return -EFAULT;
358 }
359 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
360 p->cs_flags = p->chunks[i].kdata[0];
361 if (p->chunks[i].length_dw > 1)
362 ring = p->chunks[i].kdata[1];
363 if (p->chunks[i].length_dw > 2)
364 priority = (s32)p->chunks[i].kdata[2];
771fe6b9
JG
365 }
366 }
721604a1 367
9b00147d
AD
368 /* these are KMS only */
369 if (p->rdev) {
370 if ((p->cs_flags & RADEON_CS_USE_VM) &&
371 !p->rdev->vm_manager.enabled) {
372 DRM_ERROR("VM not active on asic!\n");
373 return -EINVAL;
374 }
1b5475db 375
57449040 376 if (radeon_cs_get_ring(p, ring, priority))
9b00147d 377 return -EINVAL;
721604a1 378
57449040 379 /* we only support VM on some SI+ rings */
60a44540
CK
380 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
381 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
382 DRM_ERROR("Ring %d requires VM!\n", p->ring);
383 return -EINVAL;
384 }
385 } else {
386 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
387 DRM_ERROR("VM not supported on ring %d!\n",
388 p->ring);
389 return -EINVAL;
390 }
57449040 391 }
9b00147d 392 }
721604a1 393
771fe6b9
JG
394 return 0;
395}
396
4330441a
MO
397static int cmp_size_smaller_first(void *priv, struct list_head *a,
398 struct list_head *b)
399{
df0af440
CK
400 struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
401 struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
4330441a
MO
402
403 /* Sort A before B if A is smaller. */
df0af440 404 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
4330441a
MO
405}
406
771fe6b9
JG
407/**
408 * cs_parser_fini() - clean parser states
409 * @parser: parser structure holding parsing context.
410 * @error: error number
411 *
412 * If error is set than unvalidate buffer, otherwise just free memory
413 * used by parsing context.
414 **/
ecff665f 415static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
771fe6b9
JG
416{
417 unsigned i;
418
e43b5ec0 419 if (!error) {
4330441a
MO
420 /* Sort the buffer list from the smallest to largest buffer,
421 * which affects the order of buffers in the LRU list.
422 * This assures that the smallest buffers are added first
423 * to the LRU list, so they are likely to be later evicted
424 * first, instead of large buffers whose eviction is more
425 * expensive.
426 *
427 * This slightly lowers the number of bytes moved by TTM
428 * per frame under memory pressure.
429 */
430 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
431
ecff665f
ML
432 ttm_eu_fence_buffer_objects(&parser->ticket,
433 &parser->validated,
f2c24b83 434 &parser->ib.fence->base);
ecff665f
ML
435 } else if (backoff) {
436 ttm_eu_backoff_reservation(&parser->ticket,
437 &parser->validated);
e43b5ec0 438 }
147666fb 439
fcbc451b
PN
440 if (parser->relocs != NULL) {
441 for (i = 0; i < parser->nrelocs; i++) {
442 if (parser->relocs[i].gobj)
443 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
444 }
771fe6b9 445 }
48e113e5 446 kfree(parser->track);
771fe6b9
JG
447 kfree(parser->relocs);
448 kfree(parser->relocs_ptr);
6d2f2944 449 kfree(parser->vm_bos);
28a326c5
ML
450 for (i = 0; i < parser->nchunks; i++)
451 drm_free_large(parser->chunks[i].kdata);
771fe6b9
JG
452 kfree(parser->chunks);
453 kfree(parser->chunks_array);
454 radeon_ib_free(parser->rdev, &parser->ib);
f2e39221 455 radeon_ib_free(parser->rdev, &parser->const_ib);
771fe6b9
JG
456}
457
721604a1
JG
458static int radeon_cs_ib_chunk(struct radeon_device *rdev,
459 struct radeon_cs_parser *parser)
460{
721604a1
JG
461 int r;
462
463 if (parser->chunk_ib_idx == -1)
464 return 0;
465
466 if (parser->cs_flags & RADEON_CS_USE_VM)
467 return 0;
468
eb0c19c5 469 r = radeon_cs_parse(rdev, parser->ring, parser);
721604a1
JG
470 if (r || parser->parser_error) {
471 DRM_ERROR("Invalid command stream !\n");
472 return r;
473 }
ce3537d5
AD
474
475 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
476 radeon_uvd_note_usage(rdev);
03afe6f6
AD
477 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
478 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
479 radeon_vce_note_usage(rdev);
ce3537d5 480
220907d9 481 radeon_cs_sync_rings(parser);
1538a9e0 482 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
721604a1
JG
483 if (r) {
484 DRM_ERROR("Failed to schedule IB !\n");
485 }
93bf888c 486 return r;
721604a1
JG
487}
488
6d2f2944 489static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
721604a1
JG
490 struct radeon_vm *vm)
491{
6d2f2944 492 struct radeon_device *rdev = p->rdev;
036bf46a 493 struct radeon_bo_va *bo_va;
6d2f2944 494 int i, r;
721604a1 495
6d2f2944
CK
496 r = radeon_vm_update_page_directory(rdev, vm);
497 if (r)
3e8970f9 498 return r;
6d2f2944 499
036bf46a
CK
500 r = radeon_vm_clear_freed(rdev, vm);
501 if (r)
502 return r;
503
cc9e67e3 504 if (vm->ib_bo_va == NULL) {
036bf46a
CK
505 DRM_ERROR("Tmp BO not in VM!\n");
506 return -EINVAL;
507 }
508
cc9e67e3
CK
509 r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
510 &rdev->ring_tmp_bo.bo->tbo.mem);
6d2f2944
CK
511 if (r)
512 return r;
513
514 for (i = 0; i < p->nrelocs; i++) {
515 struct radeon_bo *bo;
516
517 /* ignore duplicates */
518 if (p->relocs_ptr[i] != &p->relocs[i])
519 continue;
520
521 bo = p->relocs[i].robj;
036bf46a
CK
522 bo_va = radeon_vm_bo_find(vm, bo);
523 if (bo_va == NULL) {
524 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
525 return -EINVAL;
526 }
527
528 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
6d2f2944 529 if (r)
721604a1 530 return r;
721604a1 531 }
e31ad969
CK
532
533 return radeon_vm_clear_invalids(rdev, vm);
721604a1
JG
534}
535
536static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
537 struct radeon_cs_parser *parser)
538{
721604a1
JG
539 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
540 struct radeon_vm *vm = &fpriv->vm;
541 int r;
542
543 if (parser->chunk_ib_idx == -1)
544 return 0;
721604a1
JG
545 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
546 return 0;
547
28a326c5 548 if (parser->const_ib.length_dw) {
f2e39221 549 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
dfcf5f36
AD
550 if (r) {
551 return r;
552 }
553 }
554
f2e39221 555 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
721604a1
JG
556 if (r) {
557 return r;
558 }
559
ce3537d5
AD
560 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
561 radeon_uvd_note_usage(rdev);
562
721604a1 563 mutex_lock(&vm->mutex);
721604a1
JG
564 r = radeon_bo_vm_update_pte(parser, vm);
565 if (r) {
566 goto out;
567 }
220907d9 568 radeon_cs_sync_rings(parser);
57d20a43 569 radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
4ef72566 570
dfcf5f36
AD
571 if ((rdev->family >= CHIP_TAHITI) &&
572 (parser->chunk_const_ib_idx != -1)) {
1538a9e0 573 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
4ef72566 574 } else {
1538a9e0 575 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
dfcf5f36
AD
576 }
577
ee60e29f 578out:
36ff39c4 579 mutex_unlock(&vm->mutex);
721604a1
JG
580 return r;
581}
582
6c6f4783
CK
583static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
584{
585 if (r == -EDEADLK) {
586 r = radeon_gpu_reset(rdev);
587 if (!r)
588 r = -EAGAIN;
589 }
590 return r;
591}
592
28a326c5
ML
593static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
594{
595 struct radeon_cs_chunk *ib_chunk;
596 struct radeon_vm *vm = NULL;
597 int r;
598
599 if (parser->chunk_ib_idx == -1)
600 return 0;
601
602 if (parser->cs_flags & RADEON_CS_USE_VM) {
603 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
604 vm = &fpriv->vm;
605
606 if ((rdev->family >= CHIP_TAHITI) &&
607 (parser->chunk_const_ib_idx != -1)) {
608 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
609 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
610 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
611 return -EINVAL;
612 }
613 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
614 vm, ib_chunk->length_dw * 4);
615 if (r) {
616 DRM_ERROR("Failed to get const ib !\n");
617 return r;
618 }
619 parser->const_ib.is_const_ib = true;
620 parser->const_ib.length_dw = ib_chunk->length_dw;
1d6ac185 621 if (copy_from_user(parser->const_ib.ptr,
28a326c5
ML
622 ib_chunk->user_ptr,
623 ib_chunk->length_dw * 4))
624 return -EFAULT;
625 }
626
627 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
628 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
629 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
630 return -EINVAL;
631 }
632 }
633 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
634
635 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
636 vm, ib_chunk->length_dw * 4);
637 if (r) {
638 DRM_ERROR("Failed to get ib !\n");
639 return r;
640 }
641 parser->ib.length_dw = ib_chunk->length_dw;
642 if (ib_chunk->kdata)
643 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
1d6ac185 644 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
28a326c5
ML
645 return -EFAULT;
646 return 0;
647}
648
771fe6b9
JG
649int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
650{
651 struct radeon_device *rdev = dev->dev_private;
652 struct radeon_cs_parser parser;
771fe6b9
JG
653 int r;
654
dee53e7f 655 down_read(&rdev->exclusive_lock);
6b7746e8 656 if (!rdev->accel_working) {
dee53e7f 657 up_read(&rdev->exclusive_lock);
6b7746e8
JG
658 return -EBUSY;
659 }
9bb39ff4
ML
660 if (rdev->in_reset) {
661 up_read(&rdev->exclusive_lock);
662 r = radeon_gpu_reset(rdev);
663 if (!r)
664 r = -EAGAIN;
665 return r;
666 }
771fe6b9
JG
667 /* initialize parser */
668 memset(&parser, 0, sizeof(struct radeon_cs_parser));
669 parser.filp = filp;
670 parser.rdev = rdev;
c8c15ff1 671 parser.dev = rdev->dev;
428c6e36 672 parser.family = rdev->family;
771fe6b9
JG
673 r = radeon_cs_parser_init(&parser, data);
674 if (r) {
675 DRM_ERROR("Failed to initialize parser !\n");
ecff665f 676 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 677 up_read(&rdev->exclusive_lock);
6c6f4783 678 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
679 return r;
680 }
28a326c5
ML
681
682 r = radeon_cs_ib_fill(rdev, &parser);
683 if (!r) {
684 r = radeon_cs_parser_relocs(&parser);
685 if (r && r != -ERESTARTSYS)
97f23b3d 686 DRM_ERROR("Failed to parse relocation %d!\n", r);
28a326c5
ML
687 }
688
689 if (r) {
ecff665f 690 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 691 up_read(&rdev->exclusive_lock);
6c6f4783 692 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
693 return r;
694 }
55b51c88 695
860024e5
CK
696 trace_radeon_cs(&parser);
697
721604a1 698 r = radeon_cs_ib_chunk(rdev, &parser);
771fe6b9 699 if (r) {
721604a1 700 goto out;
771fe6b9 701 }
721604a1 702 r = radeon_cs_ib_vm_chunk(rdev, &parser);
771fe6b9 703 if (r) {
721604a1 704 goto out;
771fe6b9 705 }
721604a1 706out:
ecff665f 707 radeon_cs_parser_fini(&parser, r, true);
dee53e7f 708 up_read(&rdev->exclusive_lock);
6c6f4783 709 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
710 return r;
711}
513bcb46 712
4db01311
IH
713/**
714 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
715 * @parser: parser structure holding parsing context.
716 * @pkt: where to store packet information
717 *
718 * Assume that chunk_ib_index is properly set. Will return -EINVAL
719 * if packet is bigger than remaining ib size. or if packets is unknown.
720 **/
721int radeon_cs_packet_parse(struct radeon_cs_parser *p,
722 struct radeon_cs_packet *pkt,
723 unsigned idx)
724{
725 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
726 struct radeon_device *rdev = p->rdev;
727 uint32_t header;
728
729 if (idx >= ib_chunk->length_dw) {
730 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
731 idx, ib_chunk->length_dw);
732 return -EINVAL;
733 }
734 header = radeon_get_ib_value(p, idx);
735 pkt->idx = idx;
736 pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
737 pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
738 pkt->one_reg_wr = 0;
739 switch (pkt->type) {
740 case RADEON_PACKET_TYPE0:
741 if (rdev->family < CHIP_R600) {
742 pkt->reg = R100_CP_PACKET0_GET_REG(header);
743 pkt->one_reg_wr =
744 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
745 } else
746 pkt->reg = R600_CP_PACKET0_GET_REG(header);
747 break;
748 case RADEON_PACKET_TYPE3:
749 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
750 break;
751 case RADEON_PACKET_TYPE2:
752 pkt->count = -1;
753 break;
754 default:
755 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
756 return -EINVAL;
757 }
758 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
759 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
760 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
761 return -EINVAL;
762 }
763 return 0;
764}
9ffb7a6d
IH
765
766/**
767 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
768 * @p: structure holding the parser context.
769 *
770 * Check if the next packet is NOP relocation packet3.
771 **/
772bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
773{
774 struct radeon_cs_packet p3reloc;
775 int r;
776
777 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
778 if (r)
779 return false;
780 if (p3reloc.type != RADEON_PACKET_TYPE3)
781 return false;
782 if (p3reloc.opcode != RADEON_PACKET3_NOP)
783 return false;
784 return true;
785}
c3ad63af
IH
786
787/**
788 * radeon_cs_dump_packet() - dump raw packet context
789 * @p: structure holding the parser context.
790 * @pkt: structure holding the packet.
791 *
792 * Used mostly for debugging and error reporting.
793 **/
794void radeon_cs_dump_packet(struct radeon_cs_parser *p,
795 struct radeon_cs_packet *pkt)
796{
797 volatile uint32_t *ib;
798 unsigned i;
799 unsigned idx;
800
801 ib = p->ib.ptr;
802 idx = pkt->idx;
803 for (i = 0; i <= (pkt->count + 1); i++, idx++)
804 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
805}
806
e9716993
IH
807/**
808 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
809 * @parser: parser structure holding parsing context.
810 * @data: pointer to relocation data
811 * @offset_start: starting offset
812 * @offset_mask: offset mask (to align start offset on)
813 * @reloc: reloc informations
814 *
815 * Check if next packet is relocation packet3, do bo validation and compute
816 * GPU offset using the provided start.
817 **/
818int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
819 struct radeon_cs_reloc **cs_reloc,
820 int nomm)
821{
822 struct radeon_cs_chunk *relocs_chunk;
823 struct radeon_cs_packet p3reloc;
824 unsigned idx;
825 int r;
826
827 if (p->chunk_relocs_idx == -1) {
828 DRM_ERROR("No relocation chunk !\n");
829 return -EINVAL;
830 }
831 *cs_reloc = NULL;
832 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
833 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
834 if (r)
835 return r;
836 p->idx += p3reloc.count + 2;
837 if (p3reloc.type != RADEON_PACKET_TYPE3 ||
838 p3reloc.opcode != RADEON_PACKET3_NOP) {
839 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
840 p3reloc.idx);
841 radeon_cs_dump_packet(p, &p3reloc);
842 return -EINVAL;
843 }
844 idx = radeon_get_ib_value(p, p3reloc.idx + 1);
845 if (idx >= relocs_chunk->length_dw) {
846 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
847 idx, relocs_chunk->length_dw);
848 radeon_cs_dump_packet(p, &p3reloc);
849 return -EINVAL;
850 }
851 /* FIXME: we assume reloc size is 4 dwords */
852 if (nomm) {
853 *cs_reloc = p->relocs;
df0af440 854 (*cs_reloc)->gpu_offset =
e9716993 855 (u64)relocs_chunk->kdata[idx + 3] << 32;
df0af440 856 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
e9716993
IH
857 } else
858 *cs_reloc = p->relocs_ptr[(idx / 4)];
859 return 0;
860}