Merge branches 'acpi-resources', 'acpi-battery', 'acpi-doc' and 'acpi-pnp'
[linux-2.6-block.git] / drivers / gpu / drm / radeon / radeon_cs.c
CommitLineData
771fe6b9
JG
1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
4330441a 27#include <linux/list_sort.h>
760285e7
DH
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
771fe6b9
JG
30#include "radeon_reg.h"
31#include "radeon.h"
860024e5 32#include "radeon_trace.h"
771fe6b9 33
c9b76548
MO
34#define RADEON_CS_MAX_PRIORITY 32u
35#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37/* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43};
44
45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46{
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51}
52
53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55{
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62}
63
64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66{
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73}
74
1109ca09 75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
771fe6b9
JG
76{
77 struct drm_device *ddev = p->rdev->ddev;
78 struct radeon_cs_chunk *chunk;
c9b76548 79 struct radeon_cs_buckets buckets;
466be338
CK
80 unsigned i;
81 bool need_mmap_lock = false;
f72a113a 82 int r;
771fe6b9 83
6d2d13dd 84 if (p->chunk_relocs == NULL) {
771fe6b9
JG
85 return 0;
86 }
6d2d13dd 87 chunk = p->chunk_relocs;
cf4ccd01 88 p->dma_reloc_idx = 0;
771fe6b9
JG
89 /* FIXME: we assume that each relocs use 4 dwords */
90 p->nrelocs = chunk->length_dw / 4;
b421ed15 91 p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
771fe6b9
JG
92 if (p->relocs == NULL) {
93 return -ENOMEM;
94 }
c9b76548
MO
95
96 radeon_cs_buckets_init(&buckets);
97
771fe6b9
JG
98 for (i = 0; i < p->nrelocs; i++) {
99 struct drm_radeon_cs_reloc *r;
d33a8fc7 100 struct drm_gem_object *gobj;
c9b76548 101 unsigned priority;
771fe6b9 102
771fe6b9 103 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
d33a8fc7
CK
104 gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
105 if (gobj == NULL) {
4474f3a9
CK
106 DRM_ERROR("gem object lookup failed 0x%x\n",
107 r->handle);
108 return -ENOENT;
109 }
d33a8fc7 110 p->relocs[i].robj = gem_to_radeon_bo(gobj);
c9b76548
MO
111
112 /* The userspace buffer priorities are from 0 to 15. A higher
113 * number means the buffer is more important.
114 * Also, the buffers used for write have a higher priority than
115 * the buffers used for read only, which doubles the range
116 * to 0 to 31. 32 is reserved for the kernel driver.
117 */
701e1e78
CK
118 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
119 + !!r->write_domain;
4474f3a9 120
4f66c599 121 /* the first reloc of an UVD job is the msg and that must be in
b6a7eeea
CK
122 VRAM, also but everything into VRAM on AGP cards and older
123 IGP chips to avoid image corruptions */
4f66c599 124 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
b6a7eeea
CK
125 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
126 p->rdev->family == CHIP_RS780 ||
127 p->rdev->family == CHIP_RS880)) {
128
bcf6f1e9 129 /* TODO: is this still needed for NI+ ? */
ce6758c8 130 p->relocs[i].prefered_domains =
f2ba57b5
CK
131 RADEON_GEM_DOMAIN_VRAM;
132
ce6758c8 133 p->relocs[i].allowed_domains =
f2ba57b5
CK
134 RADEON_GEM_DOMAIN_VRAM;
135
c9b76548
MO
136 /* prioritize this over any other relocation */
137 priority = RADEON_CS_MAX_PRIORITY;
f2ba57b5
CK
138 } else {
139 uint32_t domain = r->write_domain ?
140 r->write_domain : r->read_domains;
141
ec65da38
MO
142 if (domain & RADEON_GEM_DOMAIN_CPU) {
143 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
144 "for command submission\n");
145 return -EINVAL;
146 }
147
ce6758c8 148 p->relocs[i].prefered_domains = domain;
f2ba57b5
CK
149 if (domain == RADEON_GEM_DOMAIN_VRAM)
150 domain |= RADEON_GEM_DOMAIN_GTT;
ce6758c8 151 p->relocs[i].allowed_domains = domain;
f2ba57b5 152 }
4474f3a9 153
f72a113a
CK
154 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
155 uint32_t domain = p->relocs[i].prefered_domains;
156 if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
157 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
158 "allowed for userptr BOs\n");
159 return -EINVAL;
160 }
161 need_mmap_lock = true;
162 domain = RADEON_GEM_DOMAIN_GTT;
163 p->relocs[i].prefered_domains = domain;
164 p->relocs[i].allowed_domains = domain;
165 }
166
df0af440 167 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
298593b6 168 p->relocs[i].tv.shared = !r->write_domain;
4474f3a9 169
df0af440 170 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
c9b76548 171 priority);
771fe6b9 172 }
c9b76548
MO
173
174 radeon_cs_buckets_get_list(&buckets, &p->validated);
175
6d2f2944
CK
176 if (p->cs_flags & RADEON_CS_USE_VM)
177 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
178 &p->validated);
f72a113a
CK
179 if (need_mmap_lock)
180 down_read(&current->mm->mmap_sem);
181
182 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
183
184 if (need_mmap_lock)
185 up_read(&current->mm->mmap_sem);
6d2f2944 186
f72a113a 187 return r;
771fe6b9
JG
188}
189
721604a1
JG
190static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
191{
192 p->priority = priority;
193
194 switch (ring) {
195 default:
196 DRM_ERROR("unknown ring id: %d\n", ring);
197 return -EINVAL;
198 case RADEON_CS_RING_GFX:
199 p->ring = RADEON_RING_TYPE_GFX_INDEX;
200 break;
201 case RADEON_CS_RING_COMPUTE:
963e81f9 202 if (p->rdev->family >= CHIP_TAHITI) {
8d5ef7b1
AD
203 if (p->priority > 0)
204 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
205 else
206 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
207 } else
208 p->ring = RADEON_RING_TYPE_GFX_INDEX;
721604a1 209 break;
278a334c
AD
210 case RADEON_CS_RING_DMA:
211 if (p->rdev->family >= CHIP_CAYMAN) {
212 if (p->priority > 0)
213 p->ring = R600_RING_TYPE_DMA_INDEX;
214 else
215 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
b9ace36f 216 } else if (p->rdev->family >= CHIP_RV770) {
278a334c
AD
217 p->ring = R600_RING_TYPE_DMA_INDEX;
218 } else {
219 return -EINVAL;
220 }
221 break;
f2ba57b5
CK
222 case RADEON_CS_RING_UVD:
223 p->ring = R600_RING_TYPE_UVD_INDEX;
224 break;
d93f7937
CK
225 case RADEON_CS_RING_VCE:
226 /* TODO: only use the low priority ring for now */
227 p->ring = TN_RING_TYPE_VCE1_INDEX;
228 break;
721604a1
JG
229 }
230 return 0;
231}
232
392a250b 233static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
93504fce 234{
1d0c0942 235 struct radeon_bo_list *reloc;
c1f0a9c2 236 int r;
93504fce 237
c1f0a9c2 238 list_for_each_entry(reloc, &p->validated, tv.head) {
f2c24b83 239 struct reservation_object *resv;
f2c24b83 240
c1f0a9c2 241 resv = reloc->robj->tbo.resv;
975700d2 242 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
c1f0a9c2 243 reloc->tv.shared);
392a250b 244 if (r)
c1f0a9c2 245 return r;
8f676c4c 246 }
c1f0a9c2 247 return 0;
93504fce
CK
248}
249
9b00147d 250/* XXX: note that this is called from the legacy UMS CS ioctl as well */
771fe6b9
JG
251int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
252{
253 struct drm_radeon_cs *cs = data;
254 uint64_t *chunk_array_ptr;
721604a1
JG
255 unsigned size, i;
256 u32 ring = RADEON_CS_RING_GFX;
257 s32 priority = 0;
771fe6b9 258
a28b2a47
TR
259 INIT_LIST_HEAD(&p->validated);
260
771fe6b9
JG
261 if (!cs->num_chunks) {
262 return 0;
263 }
a28b2a47 264
771fe6b9 265 /* get chunks */
771fe6b9 266 p->idx = 0;
f2e39221 267 p->ib.sa_bo = NULL;
f2e39221 268 p->const_ib.sa_bo = NULL;
6d2d13dd
CK
269 p->chunk_ib = NULL;
270 p->chunk_relocs = NULL;
271 p->chunk_flags = NULL;
272 p->chunk_const_ib = NULL;
771fe6b9
JG
273 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
274 if (p->chunks_array == NULL) {
275 return -ENOMEM;
276 }
277 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
1d6ac185 278 if (copy_from_user(p->chunks_array, chunk_array_ptr,
771fe6b9
JG
279 sizeof(uint64_t)*cs->num_chunks)) {
280 return -EFAULT;
281 }
721604a1 282 p->cs_flags = 0;
771fe6b9
JG
283 p->nchunks = cs->num_chunks;
284 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
285 if (p->chunks == NULL) {
286 return -ENOMEM;
287 }
288 for (i = 0; i < p->nchunks; i++) {
289 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
290 struct drm_radeon_cs_chunk user_chunk;
291 uint32_t __user *cdata;
292
293 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
1d6ac185 294 if (copy_from_user(&user_chunk, chunk_ptr,
771fe6b9
JG
295 sizeof(struct drm_radeon_cs_chunk))) {
296 return -EFAULT;
297 }
5176fdc4 298 p->chunks[i].length_dw = user_chunk.length_dw;
6d2d13dd
CK
299 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
300 p->chunk_relocs = &p->chunks[i];
771fe6b9 301 }
6d2d13dd
CK
302 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
303 p->chunk_ib = &p->chunks[i];
5176fdc4
DA
304 /* zero length IB isn't useful */
305 if (p->chunks[i].length_dw == 0)
306 return -EINVAL;
771fe6b9 307 }
6d2d13dd
CK
308 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
309 p->chunk_const_ib = &p->chunks[i];
dfcf5f36
AD
310 /* zero length CONST IB isn't useful */
311 if (p->chunks[i].length_dw == 0)
312 return -EINVAL;
313 }
6d2d13dd
CK
314 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
315 p->chunk_flags = &p->chunks[i];
721604a1
JG
316 /* zero length flags aren't useful */
317 if (p->chunks[i].length_dw == 0)
318 return -EINVAL;
e70f224c 319 }
5176fdc4 320
28a326c5
ML
321 size = p->chunks[i].length_dw;
322 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
323 p->chunks[i].user_ptr = cdata;
6d2d13dd 324 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
28a326c5
ML
325 continue;
326
6d2d13dd 327 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
28a326c5
ML
328 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
329 continue;
330 }
331
332 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
333 size *= sizeof(uint32_t);
334 if (p->chunks[i].kdata == NULL) {
335 return -ENOMEM;
336 }
1d6ac185 337 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
28a326c5
ML
338 return -EFAULT;
339 }
6d2d13dd 340 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
28a326c5
ML
341 p->cs_flags = p->chunks[i].kdata[0];
342 if (p->chunks[i].length_dw > 1)
343 ring = p->chunks[i].kdata[1];
344 if (p->chunks[i].length_dw > 2)
345 priority = (s32)p->chunks[i].kdata[2];
771fe6b9
JG
346 }
347 }
721604a1 348
9b00147d
AD
349 /* these are KMS only */
350 if (p->rdev) {
351 if ((p->cs_flags & RADEON_CS_USE_VM) &&
352 !p->rdev->vm_manager.enabled) {
353 DRM_ERROR("VM not active on asic!\n");
354 return -EINVAL;
355 }
1b5475db 356
57449040 357 if (radeon_cs_get_ring(p, ring, priority))
9b00147d 358 return -EINVAL;
721604a1 359
57449040 360 /* we only support VM on some SI+ rings */
60a44540
CK
361 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
362 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
363 DRM_ERROR("Ring %d requires VM!\n", p->ring);
364 return -EINVAL;
365 }
366 } else {
367 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
368 DRM_ERROR("VM not supported on ring %d!\n",
369 p->ring);
370 return -EINVAL;
371 }
57449040 372 }
9b00147d 373 }
721604a1 374
771fe6b9
JG
375 return 0;
376}
377
4330441a
MO
378static int cmp_size_smaller_first(void *priv, struct list_head *a,
379 struct list_head *b)
380{
1d0c0942
CK
381 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
382 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
4330441a
MO
383
384 /* Sort A before B if A is smaller. */
df0af440 385 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
4330441a
MO
386}
387
771fe6b9
JG
388/**
389 * cs_parser_fini() - clean parser states
390 * @parser: parser structure holding parsing context.
391 * @error: error number
392 *
393 * If error is set than unvalidate buffer, otherwise just free memory
394 * used by parsing context.
395 **/
ecff665f 396static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
771fe6b9
JG
397{
398 unsigned i;
399
e43b5ec0 400 if (!error) {
4330441a
MO
401 /* Sort the buffer list from the smallest to largest buffer,
402 * which affects the order of buffers in the LRU list.
403 * This assures that the smallest buffers are added first
404 * to the LRU list, so they are likely to be later evicted
405 * first, instead of large buffers whose eviction is more
406 * expensive.
407 *
408 * This slightly lowers the number of bytes moved by TTM
409 * per frame under memory pressure.
410 */
411 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
412
ecff665f
ML
413 ttm_eu_fence_buffer_objects(&parser->ticket,
414 &parser->validated,
f2c24b83 415 &parser->ib.fence->base);
ecff665f
ML
416 } else if (backoff) {
417 ttm_eu_backoff_reservation(&parser->ticket,
418 &parser->validated);
e43b5ec0 419 }
147666fb 420
fcbc451b
PN
421 if (parser->relocs != NULL) {
422 for (i = 0; i < parser->nrelocs; i++) {
d33a8fc7
CK
423 struct radeon_bo *bo = parser->relocs[i].robj;
424 if (bo == NULL)
425 continue;
426
427 drm_gem_object_unreference_unlocked(&bo->gem_base);
fcbc451b 428 }
771fe6b9 429 }
48e113e5 430 kfree(parser->track);
b421ed15 431 drm_free_large(parser->relocs);
e5a5fd4d 432 drm_free_large(parser->vm_bos);
28a326c5
ML
433 for (i = 0; i < parser->nchunks; i++)
434 drm_free_large(parser->chunks[i].kdata);
771fe6b9
JG
435 kfree(parser->chunks);
436 kfree(parser->chunks_array);
437 radeon_ib_free(parser->rdev, &parser->ib);
f2e39221 438 radeon_ib_free(parser->rdev, &parser->const_ib);
771fe6b9
JG
439}
440
721604a1
JG
441static int radeon_cs_ib_chunk(struct radeon_device *rdev,
442 struct radeon_cs_parser *parser)
443{
721604a1
JG
444 int r;
445
6d2d13dd 446 if (parser->chunk_ib == NULL)
721604a1
JG
447 return 0;
448
449 if (parser->cs_flags & RADEON_CS_USE_VM)
450 return 0;
451
eb0c19c5 452 r = radeon_cs_parse(rdev, parser->ring, parser);
721604a1
JG
453 if (r || parser->parser_error) {
454 DRM_ERROR("Invalid command stream !\n");
455 return r;
456 }
ce3537d5 457
392a250b
ML
458 r = radeon_cs_sync_rings(parser);
459 if (r) {
460 if (r != -ERESTARTSYS)
461 DRM_ERROR("Failed to sync rings: %i\n", r);
462 return r;
463 }
464
ce3537d5
AD
465 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
466 radeon_uvd_note_usage(rdev);
03afe6f6
AD
467 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
468 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
469 radeon_vce_note_usage(rdev);
ce3537d5 470
1538a9e0 471 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
721604a1
JG
472 if (r) {
473 DRM_ERROR("Failed to schedule IB !\n");
474 }
93bf888c 475 return r;
721604a1
JG
476}
477
6d2f2944 478static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
721604a1
JG
479 struct radeon_vm *vm)
480{
6d2f2944 481 struct radeon_device *rdev = p->rdev;
036bf46a 482 struct radeon_bo_va *bo_va;
6d2f2944 483 int i, r;
721604a1 484
6d2f2944
CK
485 r = radeon_vm_update_page_directory(rdev, vm);
486 if (r)
3e8970f9 487 return r;
6d2f2944 488
036bf46a
CK
489 r = radeon_vm_clear_freed(rdev, vm);
490 if (r)
491 return r;
492
cc9e67e3 493 if (vm->ib_bo_va == NULL) {
036bf46a
CK
494 DRM_ERROR("Tmp BO not in VM!\n");
495 return -EINVAL;
496 }
497
cc9e67e3
CK
498 r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
499 &rdev->ring_tmp_bo.bo->tbo.mem);
6d2f2944
CK
500 if (r)
501 return r;
502
503 for (i = 0; i < p->nrelocs; i++) {
504 struct radeon_bo *bo;
505
6d2f2944 506 bo = p->relocs[i].robj;
036bf46a
CK
507 bo_va = radeon_vm_bo_find(vm, bo);
508 if (bo_va == NULL) {
509 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
510 return -EINVAL;
511 }
512
513 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
6d2f2944 514 if (r)
721604a1 515 return r;
94214635
CK
516
517 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
721604a1 518 }
e31ad969
CK
519
520 return radeon_vm_clear_invalids(rdev, vm);
721604a1
JG
521}
522
523static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
524 struct radeon_cs_parser *parser)
525{
721604a1
JG
526 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
527 struct radeon_vm *vm = &fpriv->vm;
528 int r;
529
6d2d13dd 530 if (parser->chunk_ib == NULL)
721604a1 531 return 0;
721604a1
JG
532 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
533 return 0;
534
28a326c5 535 if (parser->const_ib.length_dw) {
f2e39221 536 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
dfcf5f36
AD
537 if (r) {
538 return r;
539 }
540 }
541
f2e39221 542 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
721604a1
JG
543 if (r) {
544 return r;
545 }
546
ce3537d5
AD
547 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
548 radeon_uvd_note_usage(rdev);
549
721604a1 550 mutex_lock(&vm->mutex);
721604a1
JG
551 r = radeon_bo_vm_update_pte(parser, vm);
552 if (r) {
553 goto out;
554 }
392a250b
ML
555
556 r = radeon_cs_sync_rings(parser);
557 if (r) {
558 if (r != -ERESTARTSYS)
559 DRM_ERROR("Failed to sync rings: %i\n", r);
560 goto out;
561 }
4ef72566 562
dfcf5f36 563 if ((rdev->family >= CHIP_TAHITI) &&
6d2d13dd 564 (parser->chunk_const_ib != NULL)) {
1538a9e0 565 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
4ef72566 566 } else {
1538a9e0 567 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
dfcf5f36
AD
568 }
569
ee60e29f 570out:
36ff39c4 571 mutex_unlock(&vm->mutex);
721604a1
JG
572 return r;
573}
574
6c6f4783
CK
575static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
576{
577 if (r == -EDEADLK) {
578 r = radeon_gpu_reset(rdev);
579 if (!r)
580 r = -EAGAIN;
581 }
582 return r;
583}
584
28a326c5
ML
585static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
586{
587 struct radeon_cs_chunk *ib_chunk;
588 struct radeon_vm *vm = NULL;
589 int r;
590
6d2d13dd 591 if (parser->chunk_ib == NULL)
28a326c5
ML
592 return 0;
593
594 if (parser->cs_flags & RADEON_CS_USE_VM) {
595 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
596 vm = &fpriv->vm;
597
598 if ((rdev->family >= CHIP_TAHITI) &&
6d2d13dd
CK
599 (parser->chunk_const_ib != NULL)) {
600 ib_chunk = parser->chunk_const_ib;
28a326c5
ML
601 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
602 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
603 return -EINVAL;
604 }
605 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
606 vm, ib_chunk->length_dw * 4);
607 if (r) {
608 DRM_ERROR("Failed to get const ib !\n");
609 return r;
610 }
611 parser->const_ib.is_const_ib = true;
612 parser->const_ib.length_dw = ib_chunk->length_dw;
1d6ac185 613 if (copy_from_user(parser->const_ib.ptr,
28a326c5
ML
614 ib_chunk->user_ptr,
615 ib_chunk->length_dw * 4))
616 return -EFAULT;
617 }
618
6d2d13dd 619 ib_chunk = parser->chunk_ib;
28a326c5
ML
620 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
621 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
622 return -EINVAL;
623 }
624 }
6d2d13dd 625 ib_chunk = parser->chunk_ib;
28a326c5
ML
626
627 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
628 vm, ib_chunk->length_dw * 4);
629 if (r) {
630 DRM_ERROR("Failed to get ib !\n");
631 return r;
632 }
633 parser->ib.length_dw = ib_chunk->length_dw;
634 if (ib_chunk->kdata)
635 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
1d6ac185 636 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
28a326c5
ML
637 return -EFAULT;
638 return 0;
639}
640
771fe6b9
JG
641int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
642{
643 struct radeon_device *rdev = dev->dev_private;
644 struct radeon_cs_parser parser;
771fe6b9
JG
645 int r;
646
dee53e7f 647 down_read(&rdev->exclusive_lock);
6b7746e8 648 if (!rdev->accel_working) {
dee53e7f 649 up_read(&rdev->exclusive_lock);
6b7746e8
JG
650 return -EBUSY;
651 }
9bb39ff4
ML
652 if (rdev->in_reset) {
653 up_read(&rdev->exclusive_lock);
654 r = radeon_gpu_reset(rdev);
655 if (!r)
656 r = -EAGAIN;
657 return r;
658 }
771fe6b9
JG
659 /* initialize parser */
660 memset(&parser, 0, sizeof(struct radeon_cs_parser));
661 parser.filp = filp;
662 parser.rdev = rdev;
c8c15ff1 663 parser.dev = rdev->dev;
428c6e36 664 parser.family = rdev->family;
771fe6b9
JG
665 r = radeon_cs_parser_init(&parser, data);
666 if (r) {
667 DRM_ERROR("Failed to initialize parser !\n");
ecff665f 668 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 669 up_read(&rdev->exclusive_lock);
6c6f4783 670 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
671 return r;
672 }
28a326c5
ML
673
674 r = radeon_cs_ib_fill(rdev, &parser);
675 if (!r) {
676 r = radeon_cs_parser_relocs(&parser);
677 if (r && r != -ERESTARTSYS)
97f23b3d 678 DRM_ERROR("Failed to parse relocation %d!\n", r);
28a326c5
ML
679 }
680
681 if (r) {
ecff665f 682 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 683 up_read(&rdev->exclusive_lock);
6c6f4783 684 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
685 return r;
686 }
55b51c88 687
860024e5
CK
688 trace_radeon_cs(&parser);
689
721604a1 690 r = radeon_cs_ib_chunk(rdev, &parser);
771fe6b9 691 if (r) {
721604a1 692 goto out;
771fe6b9 693 }
721604a1 694 r = radeon_cs_ib_vm_chunk(rdev, &parser);
771fe6b9 695 if (r) {
721604a1 696 goto out;
771fe6b9 697 }
721604a1 698out:
ecff665f 699 radeon_cs_parser_fini(&parser, r, true);
dee53e7f 700 up_read(&rdev->exclusive_lock);
6c6f4783 701 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
702 return r;
703}
513bcb46 704
4db01311
IH
705/**
706 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
707 * @parser: parser structure holding parsing context.
708 * @pkt: where to store packet information
709 *
710 * Assume that chunk_ib_index is properly set. Will return -EINVAL
711 * if packet is bigger than remaining ib size. or if packets is unknown.
712 **/
713int radeon_cs_packet_parse(struct radeon_cs_parser *p,
714 struct radeon_cs_packet *pkt,
715 unsigned idx)
716{
6d2d13dd 717 struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
4db01311
IH
718 struct radeon_device *rdev = p->rdev;
719 uint32_t header;
e1b4e722 720 int ret = 0, i;
4db01311
IH
721
722 if (idx >= ib_chunk->length_dw) {
723 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
724 idx, ib_chunk->length_dw);
725 return -EINVAL;
726 }
727 header = radeon_get_ib_value(p, idx);
728 pkt->idx = idx;
729 pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
730 pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
731 pkt->one_reg_wr = 0;
732 switch (pkt->type) {
733 case RADEON_PACKET_TYPE0:
734 if (rdev->family < CHIP_R600) {
735 pkt->reg = R100_CP_PACKET0_GET_REG(header);
736 pkt->one_reg_wr =
737 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
738 } else
739 pkt->reg = R600_CP_PACKET0_GET_REG(header);
740 break;
741 case RADEON_PACKET_TYPE3:
742 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
743 break;
744 case RADEON_PACKET_TYPE2:
745 pkt->count = -1;
746 break;
747 default:
748 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
e1b4e722
AD
749 ret = -EINVAL;
750 goto dump_ib;
4db01311
IH
751 }
752 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
753 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
754 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
e1b4e722
AD
755 ret = -EINVAL;
756 goto dump_ib;
4db01311
IH
757 }
758 return 0;
e1b4e722
AD
759
760dump_ib:
761 for (i = 0; i < ib_chunk->length_dw; i++) {
762 if (i == idx)
763 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
764 else
765 printk("\t0x%08x\n", radeon_get_ib_value(p, i));
766 }
767 return ret;
4db01311 768}
9ffb7a6d
IH
769
770/**
771 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
772 * @p: structure holding the parser context.
773 *
774 * Check if the next packet is NOP relocation packet3.
775 **/
776bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
777{
778 struct radeon_cs_packet p3reloc;
779 int r;
780
781 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
782 if (r)
783 return false;
784 if (p3reloc.type != RADEON_PACKET_TYPE3)
785 return false;
786 if (p3reloc.opcode != RADEON_PACKET3_NOP)
787 return false;
788 return true;
789}
c3ad63af
IH
790
791/**
792 * radeon_cs_dump_packet() - dump raw packet context
793 * @p: structure holding the parser context.
794 * @pkt: structure holding the packet.
795 *
796 * Used mostly for debugging and error reporting.
797 **/
798void radeon_cs_dump_packet(struct radeon_cs_parser *p,
799 struct radeon_cs_packet *pkt)
800{
801 volatile uint32_t *ib;
802 unsigned i;
803 unsigned idx;
804
805 ib = p->ib.ptr;
806 idx = pkt->idx;
807 for (i = 0; i <= (pkt->count + 1); i++, idx++)
808 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
809}
810
e9716993
IH
811/**
812 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
813 * @parser: parser structure holding parsing context.
814 * @data: pointer to relocation data
815 * @offset_start: starting offset
816 * @offset_mask: offset mask (to align start offset on)
817 * @reloc: reloc informations
818 *
819 * Check if next packet is relocation packet3, do bo validation and compute
820 * GPU offset using the provided start.
821 **/
822int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
1d0c0942 823 struct radeon_bo_list **cs_reloc,
e9716993
IH
824 int nomm)
825{
826 struct radeon_cs_chunk *relocs_chunk;
827 struct radeon_cs_packet p3reloc;
828 unsigned idx;
829 int r;
830
6d2d13dd 831 if (p->chunk_relocs == NULL) {
e9716993
IH
832 DRM_ERROR("No relocation chunk !\n");
833 return -EINVAL;
834 }
835 *cs_reloc = NULL;
6d2d13dd 836 relocs_chunk = p->chunk_relocs;
e9716993
IH
837 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
838 if (r)
839 return r;
840 p->idx += p3reloc.count + 2;
841 if (p3reloc.type != RADEON_PACKET_TYPE3 ||
842 p3reloc.opcode != RADEON_PACKET3_NOP) {
843 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
844 p3reloc.idx);
845 radeon_cs_dump_packet(p, &p3reloc);
846 return -EINVAL;
847 }
848 idx = radeon_get_ib_value(p, p3reloc.idx + 1);
849 if (idx >= relocs_chunk->length_dw) {
850 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
851 idx, relocs_chunk->length_dw);
852 radeon_cs_dump_packet(p, &p3reloc);
853 return -EINVAL;
854 }
855 /* FIXME: we assume reloc size is 4 dwords */
856 if (nomm) {
857 *cs_reloc = p->relocs;
df0af440 858 (*cs_reloc)->gpu_offset =
e9716993 859 (u64)relocs_chunk->kdata[idx + 3] << 32;
df0af440 860 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
e9716993 861 } else
466be338 862 *cs_reloc = &p->relocs[(idx / 4)];
e9716993
IH
863 return 0;
864}