drm/radeon: initial VCE support v4
[linux-2.6-block.git] / drivers / gpu / drm / radeon / radeon_vce.c
CommitLineData
d93f7937
CK
1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 * Authors: Christian König <christian.koenig@amd.com>
26 */
27
28#include <linux/firmware.h>
29#include <linux/module.h>
30#include <drm/drmP.h>
31#include <drm/drm.h>
32
33#include "radeon.h"
34#include "radeon_asic.h"
35#include "sid.h"
36
37/* Firmware Names */
38#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
39
40MODULE_FIRMWARE(FIRMWARE_BONAIRE);
41
42/**
43 * radeon_vce_init - allocate memory, load vce firmware
44 *
45 * @rdev: radeon_device pointer
46 *
47 * First step to get VCE online, allocate memory and load the firmware
48 */
49int radeon_vce_init(struct radeon_device *rdev)
50{
51 unsigned long bo_size;
52 const char *fw_name;
53 int i, r;
54
55 switch (rdev->family) {
56 case CHIP_BONAIRE:
57 case CHIP_KAVERI:
58 case CHIP_KABINI:
59 fw_name = FIRMWARE_BONAIRE;
60 break;
61
62 default:
63 return -EINVAL;
64 }
65
66 r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev);
67 if (r) {
68 dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n",
69 fw_name);
70 return r;
71 }
72
73 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
74 RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
75 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
76 RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo);
77 if (r) {
78 dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
79 return r;
80 }
81
82 r = radeon_vce_resume(rdev);
83 if (r)
84 return r;
85
86 memset(rdev->vce.cpu_addr, 0, bo_size);
87 memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
88
89 r = radeon_vce_suspend(rdev);
90 if (r)
91 return r;
92
93 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
94 atomic_set(&rdev->vce.handles[i], 0);
95 rdev->vce.filp[i] = NULL;
96 }
97
98 return 0;
99}
100
101/**
102 * radeon_vce_fini - free memory
103 *
104 * @rdev: radeon_device pointer
105 *
106 * Last step on VCE teardown, free firmware memory
107 */
108void radeon_vce_fini(struct radeon_device *rdev)
109{
110 radeon_vce_suspend(rdev);
111 radeon_bo_unref(&rdev->vce.vcpu_bo);
112}
113
114/**
115 * radeon_vce_suspend - unpin VCE fw memory
116 *
117 * @rdev: radeon_device pointer
118 *
119 * TODO: Test VCE suspend/resume
120 */
121int radeon_vce_suspend(struct radeon_device *rdev)
122{
123 int r;
124
125 if (rdev->vce.vcpu_bo == NULL)
126 return 0;
127
128 r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
129 if (!r) {
130 radeon_bo_kunmap(rdev->vce.vcpu_bo);
131 radeon_bo_unpin(rdev->vce.vcpu_bo);
132 radeon_bo_unreserve(rdev->vce.vcpu_bo);
133 }
134 return r;
135}
136
137/**
138 * radeon_vce_resume - pin VCE fw memory
139 *
140 * @rdev: radeon_device pointer
141 *
142 * TODO: Test VCE suspend/resume
143 */
144int radeon_vce_resume(struct radeon_device *rdev)
145{
146 int r;
147
148 if (rdev->vce.vcpu_bo == NULL)
149 return -EINVAL;
150
151 r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
152 if (r) {
153 radeon_bo_unref(&rdev->vce.vcpu_bo);
154 dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
155 return r;
156 }
157
158 r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
159 &rdev->vce.gpu_addr);
160 if (r) {
161 radeon_bo_unreserve(rdev->vce.vcpu_bo);
162 radeon_bo_unref(&rdev->vce.vcpu_bo);
163 dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
164 return r;
165 }
166
167 r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr);
168 if (r) {
169 dev_err(rdev->dev, "(%d) VCE map failed\n", r);
170 return r;
171 }
172
173 radeon_bo_unreserve(rdev->vce.vcpu_bo);
174
175 return 0;
176}
177
178/**
179 * radeon_vce_free_handles - free still open VCE handles
180 *
181 * @rdev: radeon_device pointer
182 * @filp: drm file pointer
183 *
184 * Close all VCE handles still open by this file pointer
185 */
186void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp)
187{
188 int i, r;
189 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
190 uint32_t handle = atomic_read(&rdev->vce.handles[i]);
191 if (!handle || rdev->vce.filp[i] != filp)
192 continue;
193
194 r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX,
195 handle, NULL);
196 if (r)
197 DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
198
199 rdev->vce.filp[i] = NULL;
200 atomic_set(&rdev->vce.handles[i], 0);
201 }
202}
203
204/**
205 * radeon_vce_get_create_msg - generate a VCE create msg
206 *
207 * @rdev: radeon_device pointer
208 * @ring: ring we should submit the msg to
209 * @handle: VCE session handle to use
210 * @fence: optional fence to return
211 *
212 * Open up a stream for HW test
213 */
214int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
215 uint32_t handle, struct radeon_fence **fence)
216{
217 const unsigned ib_size_dw = 1024;
218 struct radeon_ib ib;
219 uint64_t dummy;
220 int i, r;
221
222 r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
223 if (r) {
224 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
225 return r;
226 }
227
228 dummy = ib.gpu_addr + 1024;
229
230 /* stitch together an VCE create msg */
231 ib.length_dw = 0;
232 ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
233 ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
234 ib.ptr[ib.length_dw++] = handle;
235
236 ib.ptr[ib.length_dw++] = 0x00000030; /* len */
237 ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */
238 ib.ptr[ib.length_dw++] = 0x00000000;
239 ib.ptr[ib.length_dw++] = 0x00000042;
240 ib.ptr[ib.length_dw++] = 0x0000000a;
241 ib.ptr[ib.length_dw++] = 0x00000001;
242 ib.ptr[ib.length_dw++] = 0x00000080;
243 ib.ptr[ib.length_dw++] = 0x00000060;
244 ib.ptr[ib.length_dw++] = 0x00000100;
245 ib.ptr[ib.length_dw++] = 0x00000100;
246 ib.ptr[ib.length_dw++] = 0x0000000c;
247 ib.ptr[ib.length_dw++] = 0x00000000;
248
249 ib.ptr[ib.length_dw++] = 0x00000014; /* len */
250 ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
251 ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
252 ib.ptr[ib.length_dw++] = dummy;
253 ib.ptr[ib.length_dw++] = 0x00000001;
254
255 for (i = ib.length_dw; i < ib_size_dw; ++i)
256 ib.ptr[i] = 0x0;
257
258 r = radeon_ib_schedule(rdev, &ib, NULL);
259 if (r) {
260 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
261 }
262
263 if (fence)
264 *fence = radeon_fence_ref(ib.fence);
265
266 radeon_ib_free(rdev, &ib);
267
268 return r;
269}
270
271/**
272 * radeon_vce_get_destroy_msg - generate a VCE destroy msg
273 *
274 * @rdev: radeon_device pointer
275 * @ring: ring we should submit the msg to
276 * @handle: VCE session handle to use
277 * @fence: optional fence to return
278 *
279 * Close up a stream for HW test or if userspace failed to do so
280 */
281int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
282 uint32_t handle, struct radeon_fence **fence)
283{
284 const unsigned ib_size_dw = 1024;
285 struct radeon_ib ib;
286 uint64_t dummy;
287 int i, r;
288
289 r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
290 if (r) {
291 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
292 return r;
293 }
294
295 dummy = ib.gpu_addr + 1024;
296
297 /* stitch together an VCE destroy msg */
298 ib.length_dw = 0;
299 ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
300 ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
301 ib.ptr[ib.length_dw++] = handle;
302
303 ib.ptr[ib.length_dw++] = 0x00000014; /* len */
304 ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
305 ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
306 ib.ptr[ib.length_dw++] = dummy;
307 ib.ptr[ib.length_dw++] = 0x00000001;
308
309 ib.ptr[ib.length_dw++] = 0x00000008; /* len */
310 ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */
311
312 for (i = ib.length_dw; i < ib_size_dw; ++i)
313 ib.ptr[i] = 0x0;
314
315 r = radeon_ib_schedule(rdev, &ib, NULL);
316 if (r) {
317 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
318 }
319
320 if (fence)
321 *fence = radeon_fence_ref(ib.fence);
322
323 radeon_ib_free(rdev, &ib);
324
325 return r;
326}
327
328/**
329 * radeon_vce_cs_reloc - command submission relocation
330 *
331 * @p: parser context
332 * @lo: address of lower dword
333 * @hi: address of higher dword
334 *
335 * Patch relocation inside command stream with real buffer address
336 */
337int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi)
338{
339 struct radeon_cs_chunk *relocs_chunk;
340 uint64_t offset;
341 unsigned idx;
342
343 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
344 offset = radeon_get_ib_value(p, lo);
345 idx = radeon_get_ib_value(p, hi);
346
347 if (idx >= relocs_chunk->length_dw) {
348 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
349 idx, relocs_chunk->length_dw);
350 return -EINVAL;
351 }
352
353 offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset;
354
355 p->ib.ptr[lo] = offset & 0xFFFFFFFF;
356 p->ib.ptr[hi] = offset >> 32;
357
358 return 0;
359}
360
361/**
362 * radeon_vce_cs_parse - parse and validate the command stream
363 *
364 * @p: parser context
365 *
366 */
367int radeon_vce_cs_parse(struct radeon_cs_parser *p)
368{
369 uint32_t handle = 0;
370 bool destroy = false;
371 int i, r;
372
373 while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) {
374 uint32_t len = radeon_get_ib_value(p, p->idx);
375 uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);
376
377 if ((len < 8) || (len & 3)) {
378 DRM_ERROR("invalid VCE command length (%d)!\n", len);
379 return -EINVAL;
380 }
381
382 switch (cmd) {
383 case 0x00000001: // session
384 handle = radeon_get_ib_value(p, p->idx + 2);
385 break;
386
387 case 0x00000002: // task info
388 case 0x01000001: // create
389 case 0x04000001: // config extension
390 case 0x04000002: // pic control
391 case 0x04000005: // rate control
392 case 0x04000007: // motion estimation
393 case 0x04000008: // rdo
394 break;
395
396 case 0x03000001: // encode
397 r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9);
398 if (r)
399 return r;
400
401 r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11);
402 if (r)
403 return r;
404 break;
405
406 case 0x02000001: // destroy
407 destroy = true;
408 break;
409
410 case 0x05000001: // context buffer
411 case 0x05000004: // video bitstream buffer
412 case 0x05000005: // feedback buffer
413 r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2);
414 if (r)
415 return r;
416 break;
417
418 default:
419 DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
420 return -EINVAL;
421 }
422
423 p->idx += len / 4;
424 }
425
426 if (destroy) {
427 /* IB contains a destroy msg, free the handle */
428 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
429 atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0);
430
431 return 0;
432 }
433
434 /* create or encode, validate the handle */
435 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
436 if (atomic_read(&p->rdev->vce.handles[i]) == handle)
437 return 0;
438 }
439
440 /* handle not found try to alloc a new one */
441 for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
442 if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) {
443 p->rdev->vce.filp[i] = p->filp;
444 return 0;
445 }
446 }
447
448 DRM_ERROR("No more free VCE handles!\n");
449 return -EINVAL;
450}
451
452/**
453 * radeon_vce_semaphore_emit - emit a semaphore command
454 *
455 * @rdev: radeon_device pointer
456 * @ring: engine to use
457 * @semaphore: address of semaphore
458 * @emit_wait: true=emit wait, false=emit signal
459 *
460 */
461bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
462 struct radeon_ring *ring,
463 struct radeon_semaphore *semaphore,
464 bool emit_wait)
465{
466 uint64_t addr = semaphore->gpu_addr;
467
468 radeon_ring_write(ring, VCE_CMD_SEMAPHORE);
469 radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
470 radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
471 radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
472 if (!emit_wait)
473 radeon_ring_write(ring, VCE_CMD_END);
474
475 return true;
476}
477
478/**
479 * radeon_vce_ib_execute - execute indirect buffer
480 *
481 * @rdev: radeon_device pointer
482 * @ib: the IB to execute
483 *
484 */
485void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
486{
487 struct radeon_ring *ring = &rdev->ring[ib->ring];
488 radeon_ring_write(ring, VCE_CMD_IB);
489 radeon_ring_write(ring, ib->gpu_addr);
490 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
491 radeon_ring_write(ring, ib->length_dw);
492}
493
494/**
495 * radeon_vce_fence_emit - add a fence command to the ring
496 *
497 * @rdev: radeon_device pointer
498 * @fence: the fence
499 *
500 */
501void radeon_vce_fence_emit(struct radeon_device *rdev,
502 struct radeon_fence *fence)
503{
504 struct radeon_ring *ring = &rdev->ring[fence->ring];
505 uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
506
507 radeon_ring_write(ring, VCE_CMD_FENCE);
508 radeon_ring_write(ring, addr);
509 radeon_ring_write(ring, upper_32_bits(addr));
510 radeon_ring_write(ring, fence->seq);
511 radeon_ring_write(ring, VCE_CMD_TRAP);
512 radeon_ring_write(ring, VCE_CMD_END);
513}
514
515/**
516 * radeon_vce_ring_test - test if VCE ring is working
517 *
518 * @rdev: radeon_device pointer
519 * @ring: the engine to test on
520 *
521 */
522int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
523{
524 uint32_t rptr = vce_v1_0_get_rptr(rdev, ring);
525 unsigned i;
526 int r;
527
528 r = radeon_ring_lock(rdev, ring, 16);
529 if (r) {
530 DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n",
531 ring->idx, r);
532 return r;
533 }
534 radeon_ring_write(ring, VCE_CMD_END);
535 radeon_ring_unlock_commit(rdev, ring);
536
537 for (i = 0; i < rdev->usec_timeout; i++) {
538 if (vce_v1_0_get_rptr(rdev, ring) != rptr)
539 break;
540 DRM_UDELAY(1);
541 }
542
543 if (i < rdev->usec_timeout) {
544 DRM_INFO("ring test on %d succeeded in %d usecs\n",
545 ring->idx, i);
546 } else {
547 DRM_ERROR("radeon: ring %d test failed\n",
548 ring->idx);
549 r = -ETIMEDOUT;
550 }
551
552 return r;
553}
554
555/**
556 * radeon_vce_ib_test - test if VCE IBs are working
557 *
558 * @rdev: radeon_device pointer
559 * @ring: the engine to test on
560 *
561 */
562int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
563{
564 struct radeon_fence *fence = NULL;
565 int r;
566
567 r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL);
568 if (r) {
569 DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
570 goto error;
571 }
572
573 r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence);
574 if (r) {
575 DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
576 goto error;
577 }
578
579 r = radeon_fence_wait(fence, false);
580 if (r) {
581 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
582 } else {
583 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
584 }
585error:
586 radeon_fence_unref(&fence);
587 return r;
588}