Merge branch 'drm-next-3.18' of git://people.freedesktop.org/~agd5f/linux into drm...
[linux-2.6-block.git] / drivers / gpu / drm / radeon / radeon_uvd.c
CommitLineData
f2ba57b5
CK
1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26/*
27 * Authors:
28 * Christian König <deathsimple@vodafone.de>
29 */
30
31#include <linux/firmware.h>
32#include <linux/module.h>
33#include <drm/drmP.h>
34#include <drm/drm.h>
35
36#include "radeon.h"
37#include "r600d.h"
38
55b51c88
CK
39/* 1 second timeout */
40#define UVD_IDLE_TIMEOUT_MS 1000
41
f2ba57b5 42/* Firmware Names */
14e935ae
CK
43#define FIRMWARE_R600 "radeon/R600_uvd.bin"
44#define FIRMWARE_RS780 "radeon/RS780_uvd.bin"
45#define FIRMWARE_RV770 "radeon/RV770_uvd.bin"
f2ba57b5
CK
46#define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
47#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
48#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
49#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
87167bb1 50#define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin"
f2ba57b5 51
14e935ae
CK
52MODULE_FIRMWARE(FIRMWARE_R600);
53MODULE_FIRMWARE(FIRMWARE_RS780);
54MODULE_FIRMWARE(FIRMWARE_RV770);
f2ba57b5
CK
55MODULE_FIRMWARE(FIRMWARE_RV710);
56MODULE_FIRMWARE(FIRMWARE_CYPRESS);
57MODULE_FIRMWARE(FIRMWARE_SUMO);
58MODULE_FIRMWARE(FIRMWARE_TAHITI);
87167bb1 59MODULE_FIRMWARE(FIRMWARE_BONAIRE);
f2ba57b5 60
55b51c88
CK
61static void radeon_uvd_idle_work_handler(struct work_struct *work);
62
f2ba57b5
CK
63int radeon_uvd_init(struct radeon_device *rdev)
64{
f2ba57b5
CK
65 unsigned long bo_size;
66 const char *fw_name;
67 int i, r;
68
55b51c88
CK
69 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
70
f2ba57b5 71 switch (rdev->family) {
14e935ae
CK
72 case CHIP_RV610:
73 case CHIP_RV630:
74 case CHIP_RV670:
75 case CHIP_RV620:
76 case CHIP_RV635:
77 fw_name = FIRMWARE_R600;
78 break;
79
80 case CHIP_RS780:
81 case CHIP_RS880:
82 fw_name = FIRMWARE_RS780;
83 break;
84
85 case CHIP_RV770:
86 fw_name = FIRMWARE_RV770;
87 break;
88
f2ba57b5
CK
89 case CHIP_RV710:
90 case CHIP_RV730:
91 case CHIP_RV740:
92 fw_name = FIRMWARE_RV710;
93 break;
94
95 case CHIP_CYPRESS:
96 case CHIP_HEMLOCK:
97 case CHIP_JUNIPER:
98 case CHIP_REDWOOD:
99 case CHIP_CEDAR:
100 fw_name = FIRMWARE_CYPRESS;
101 break;
102
103 case CHIP_SUMO:
104 case CHIP_SUMO2:
105 case CHIP_PALM:
106 case CHIP_CAYMAN:
107 case CHIP_BARTS:
108 case CHIP_TURKS:
109 case CHIP_CAICOS:
110 fw_name = FIRMWARE_SUMO;
111 break;
112
113 case CHIP_TAHITI:
114 case CHIP_VERDE:
115 case CHIP_PITCAIRN:
116 case CHIP_ARUBA:
5d029339 117 case CHIP_OLAND:
f2ba57b5
CK
118 fw_name = FIRMWARE_TAHITI;
119 break;
120
87167bb1
CK
121 case CHIP_BONAIRE:
122 case CHIP_KABINI:
123 case CHIP_KAVERI:
4256331a 124 case CHIP_HAWAII:
3f6f0737 125 case CHIP_MULLINS:
87167bb1
CK
126 fw_name = FIRMWARE_BONAIRE;
127 break;
128
f2ba57b5
CK
129 default:
130 return -EINVAL;
131 }
132
4ad9c1c7 133 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
f2ba57b5
CK
134 if (r) {
135 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
136 fw_name);
f2ba57b5
CK
137 return r;
138 }
139
4ad9c1c7 140 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
f2ba57b5
CK
141 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
142 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
02376d82 143 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo);
f2ba57b5
CK
144 if (r) {
145 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
146 return r;
147 }
148
9cc2e0e9
CK
149 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
150 if (r) {
151 radeon_bo_unref(&rdev->uvd.vcpu_bo);
152 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
f2ba57b5 153 return r;
9cc2e0e9 154 }
f2ba57b5 155
9cc2e0e9
CK
156 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
157 &rdev->uvd.gpu_addr);
158 if (r) {
159 radeon_bo_unreserve(rdev->uvd.vcpu_bo);
160 radeon_bo_unref(&rdev->uvd.vcpu_bo);
161 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
162 return r;
163 }
f2ba57b5 164
9cc2e0e9
CK
165 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
166 if (r) {
167 dev_err(rdev->dev, "(%d) UVD map failed\n", r);
f2ba57b5 168 return r;
9cc2e0e9
CK
169 }
170
171 radeon_bo_unreserve(rdev->uvd.vcpu_bo);
172
f2ba57b5
CK
173 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
174 atomic_set(&rdev->uvd.handles[i], 0);
175 rdev->uvd.filp[i] = NULL;
85a129ca 176 rdev->uvd.img_size[i] = 0;
f2ba57b5
CK
177 }
178
179 return 0;
180}
181
182void radeon_uvd_fini(struct radeon_device *rdev)
f2ba57b5
CK
183{
184 int r;
185
186 if (rdev->uvd.vcpu_bo == NULL)
9cc2e0e9 187 return;
f2ba57b5
CK
188
189 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
190 if (!r) {
191 radeon_bo_kunmap(rdev->uvd.vcpu_bo);
192 radeon_bo_unpin(rdev->uvd.vcpu_bo);
193 radeon_bo_unreserve(rdev->uvd.vcpu_bo);
194 }
9cc2e0e9
CK
195
196 radeon_bo_unref(&rdev->uvd.vcpu_bo);
4ad9c1c7 197
d9654413
JG
198 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
199
4ad9c1c7 200 release_firmware(rdev->uvd_fw);
f2ba57b5
CK
201}
202
9cc2e0e9 203int radeon_uvd_suspend(struct radeon_device *rdev)
f2ba57b5 204{
9cc2e0e9 205 unsigned size;
4ad9c1c7
CK
206 void *ptr;
207 int i;
f2ba57b5
CK
208
209 if (rdev->uvd.vcpu_bo == NULL)
9cc2e0e9 210 return 0;
f2ba57b5 211
4ad9c1c7
CK
212 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
213 if (atomic_read(&rdev->uvd.handles[i]))
214 break;
215
216 if (i == RADEON_MAX_UVD_HANDLES)
217 return 0;
218
9cc2e0e9 219 size = radeon_bo_size(rdev->uvd.vcpu_bo);
4ad9c1c7
CK
220 size -= rdev->uvd_fw->size;
221
222 ptr = rdev->uvd.cpu_addr;
223 ptr += rdev->uvd_fw->size;
224
9cc2e0e9 225 rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
4ad9c1c7 226 memcpy(rdev->uvd.saved_bo, ptr, size);
f2ba57b5 227
9cc2e0e9
CK
228 return 0;
229}
089920f2 230
9cc2e0e9
CK
231int radeon_uvd_resume(struct radeon_device *rdev)
232{
4ad9c1c7
CK
233 unsigned size;
234 void *ptr;
235
9cc2e0e9
CK
236 if (rdev->uvd.vcpu_bo == NULL)
237 return -EINVAL;
f2ba57b5 238
4ad9c1c7
CK
239 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
240
241 size = radeon_bo_size(rdev->uvd.vcpu_bo);
242 size -= rdev->uvd_fw->size;
243
244 ptr = rdev->uvd.cpu_addr;
245 ptr += rdev->uvd_fw->size;
246
9cc2e0e9 247 if (rdev->uvd.saved_bo != NULL) {
4ad9c1c7 248 memcpy(ptr, rdev->uvd.saved_bo, size);
9cc2e0e9
CK
249 kfree(rdev->uvd.saved_bo);
250 rdev->uvd.saved_bo = NULL;
4ad9c1c7
CK
251 } else
252 memset(ptr, 0, size);
f2ba57b5 253
f2ba57b5
CK
254 return 0;
255}
256
257void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
258{
f1217ed0
CK
259 int i;
260
261 for (i = 0; i < rbo->placement.num_placement; ++i) {
262 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
263 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
264 }
f2ba57b5
CK
265}
266
267void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
268{
269 int i, r;
270 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
641a0059
CK
271 uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
272 if (handle != 0 && rdev->uvd.filp[i] == filp) {
f2ba57b5
CK
273 struct radeon_fence *fence;
274
c154a763
CK
275 radeon_uvd_note_usage(rdev);
276
f2ba57b5
CK
277 r = radeon_uvd_get_destroy_msg(rdev,
278 R600_RING_TYPE_UVD_INDEX, handle, &fence);
279 if (r) {
280 DRM_ERROR("Error destroying UVD (%d)!\n", r);
281 continue;
282 }
283
284 radeon_fence_wait(fence, false);
285 radeon_fence_unref(&fence);
286
287 rdev->uvd.filp[i] = NULL;
288 atomic_set(&rdev->uvd.handles[i], 0);
289 }
290 }
291}
292
293static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
294{
295 unsigned stream_type = msg[4];
296 unsigned width = msg[6];
297 unsigned height = msg[7];
298 unsigned dpb_size = msg[9];
299 unsigned pitch = msg[28];
300
301 unsigned width_in_mb = width / 16;
302 unsigned height_in_mb = ALIGN(height / 16, 2);
303
304 unsigned image_size, tmp, min_dpb_size;
305
306 image_size = width * height;
307 image_size += image_size / 2;
308 image_size = ALIGN(image_size, 1024);
309
310 switch (stream_type) {
311 case 0: /* H264 */
312
313 /* reference picture buffer */
314 min_dpb_size = image_size * 17;
315
316 /* macroblock context buffer */
317 min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
318
319 /* IT surface buffer */
320 min_dpb_size += width_in_mb * height_in_mb * 32;
321 break;
322
323 case 1: /* VC1 */
324
325 /* reference picture buffer */
326 min_dpb_size = image_size * 3;
327
328 /* CONTEXT_BUFFER */
329 min_dpb_size += width_in_mb * height_in_mb * 128;
330
331 /* IT surface buffer */
332 min_dpb_size += width_in_mb * 64;
333
334 /* DB surface buffer */
335 min_dpb_size += width_in_mb * 128;
336
337 /* BP */
338 tmp = max(width_in_mb, height_in_mb);
339 min_dpb_size += ALIGN(tmp * 7 * 16, 64);
340 break;
341
342 case 3: /* MPEG2 */
343
344 /* reference picture buffer */
345 min_dpb_size = image_size * 3;
346 break;
347
348 case 4: /* MPEG4 */
349
350 /* reference picture buffer */
351 min_dpb_size = image_size * 3;
352
353 /* CM */
354 min_dpb_size += width_in_mb * height_in_mb * 64;
355
356 /* IT surface buffer */
357 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
358 break;
359
360 default:
361 DRM_ERROR("UVD codec not handled %d!\n", stream_type);
362 return -EINVAL;
363 }
364
365 if (width > pitch) {
366 DRM_ERROR("Invalid UVD decoding target pitch!\n");
367 return -EINVAL;
368 }
369
370 if (dpb_size < min_dpb_size) {
371 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
372 dpb_size, min_dpb_size);
373 return -EINVAL;
374 }
375
376 buf_sizes[0x1] = dpb_size;
377 buf_sizes[0x2] = image_size;
378 return 0;
379}
380
381static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
382 unsigned offset, unsigned buf_sizes[])
383{
384 int32_t *msg, msg_type, handle;
85a129ca 385 unsigned img_size = 0;
f2ba57b5
CK
386 void *ptr;
387
388 int i, r;
389
390 if (offset & 0x3F) {
391 DRM_ERROR("UVD messages must be 64 byte aligned!\n");
392 return -EINVAL;
393 }
394
112a6d0c
CK
395 if (bo->tbo.sync_obj) {
396 r = radeon_fence_wait(bo->tbo.sync_obj, false);
397 if (r) {
398 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
399 return r;
400 }
401 }
402
f2ba57b5 403 r = radeon_bo_kmap(bo, &ptr);
56cc2c15
CK
404 if (r) {
405 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
f2ba57b5 406 return r;
56cc2c15 407 }
f2ba57b5
CK
408
409 msg = ptr + offset;
410
411 msg_type = msg[1];
412 handle = msg[2];
413
414 if (handle == 0) {
415 DRM_ERROR("Invalid UVD handle!\n");
416 return -EINVAL;
417 }
418
419 if (msg_type == 1) {
420 /* it's a decode msg, calc buffer sizes */
421 r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
85a129ca
AD
422 /* calc image size (width * height) */
423 img_size = msg[6] * msg[7];
f2ba57b5
CK
424 radeon_bo_kunmap(bo);
425 if (r)
426 return r;
427
428 } else if (msg_type == 2) {
429 /* it's a destroy msg, free the handle */
430 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
431 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
432 radeon_bo_kunmap(bo);
433 return 0;
434 } else {
85a129ca
AD
435 /* it's a create msg, calc image size (width * height) */
436 img_size = msg[7] * msg[8];
f2ba57b5 437 radeon_bo_kunmap(bo);
56cc2c15
CK
438
439 if (msg_type != 0) {
440 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
441 return -EINVAL;
442 }
443
444 /* it's a create msg, no special handling needed */
f2ba57b5
CK
445 }
446
447 /* create or decode, validate the handle */
448 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
449 if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
450 return 0;
451 }
452
453 /* handle not found try to alloc a new one */
454 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
455 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
456 p->rdev->uvd.filp[i] = p->filp;
85a129ca 457 p->rdev->uvd.img_size[i] = img_size;
f2ba57b5
CK
458 return 0;
459 }
460 }
461
462 DRM_ERROR("No more free UVD handles!\n");
463 return -EINVAL;
464}
465
466static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
467 int data0, int data1,
56cc2c15 468 unsigned buf_sizes[], bool *has_msg_cmd)
f2ba57b5
CK
469{
470 struct radeon_cs_chunk *relocs_chunk;
471 struct radeon_cs_reloc *reloc;
472 unsigned idx, cmd, offset;
473 uint64_t start, end;
474 int r;
475
476 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
477 offset = radeon_get_ib_value(p, data0);
478 idx = radeon_get_ib_value(p, data1);
479 if (idx >= relocs_chunk->length_dw) {
480 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
481 idx, relocs_chunk->length_dw);
482 return -EINVAL;
483 }
484
485 reloc = p->relocs_ptr[(idx / 4)];
df0af440 486 start = reloc->gpu_offset;
f2ba57b5
CK
487 end = start + radeon_bo_size(reloc->robj);
488 start += offset;
489
490 p->ib.ptr[data0] = start & 0xFFFFFFFF;
491 p->ib.ptr[data1] = start >> 32;
492
493 cmd = radeon_get_ib_value(p, p->idx) >> 1;
494
495 if (cmd < 0x4) {
695daf1a
LL
496 if (end <= start) {
497 DRM_ERROR("invalid reloc offset %X!\n", offset);
498 return -EINVAL;
499 }
f2ba57b5 500 if ((end - start) < buf_sizes[cmd]) {
56cc2c15 501 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
f2ba57b5
CK
502 (unsigned)(end - start), buf_sizes[cmd]);
503 return -EINVAL;
504 }
505
506 } else if (cmd != 0x100) {
507 DRM_ERROR("invalid UVD command %X!\n", cmd);
508 return -EINVAL;
509 }
510
bae651db 511 if ((start >> 28) != ((end - 1) >> 28)) {
a92c7d55
CK
512 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
513 start, end);
514 return -EINVAL;
f2ba57b5
CK
515 }
516
bcf6f1e9
CK
517 /* TODO: is this still necessary on NI+ ? */
518 if ((cmd == 0 || cmd == 0x3) &&
a92c7d55
CK
519 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
520 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
f2ba57b5
CK
521 start, end);
522 return -EINVAL;
523 }
524
a92c7d55 525 if (cmd == 0) {
56cc2c15
CK
526 if (*has_msg_cmd) {
527 DRM_ERROR("More than one message in a UVD-IB!\n");
528 return -EINVAL;
529 }
530 *has_msg_cmd = true;
a92c7d55
CK
531 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
532 if (r)
533 return r;
56cc2c15
CK
534 } else if (!*has_msg_cmd) {
535 DRM_ERROR("Message needed before other commands are send!\n");
536 return -EINVAL;
a92c7d55
CK
537 }
538
f2ba57b5
CK
539 return 0;
540}
541
542static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
543 struct radeon_cs_packet *pkt,
544 int *data0, int *data1,
56cc2c15
CK
545 unsigned buf_sizes[],
546 bool *has_msg_cmd)
f2ba57b5
CK
547{
548 int i, r;
549
550 p->idx++;
551 for (i = 0; i <= pkt->count; ++i) {
552 switch (pkt->reg + i*4) {
553 case UVD_GPCOM_VCPU_DATA0:
554 *data0 = p->idx;
555 break;
556 case UVD_GPCOM_VCPU_DATA1:
557 *data1 = p->idx;
558 break;
559 case UVD_GPCOM_VCPU_CMD:
56cc2c15
CK
560 r = radeon_uvd_cs_reloc(p, *data0, *data1,
561 buf_sizes, has_msg_cmd);
f2ba57b5
CK
562 if (r)
563 return r;
564 break;
565 case UVD_ENGINE_CNTL:
566 break;
567 default:
568 DRM_ERROR("Invalid reg 0x%X!\n",
569 pkt->reg + i*4);
570 return -EINVAL;
571 }
572 p->idx++;
573 }
574 return 0;
575}
576
577int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
578{
579 struct radeon_cs_packet pkt;
580 int r, data0 = 0, data1 = 0;
581
56cc2c15
CK
582 /* does the IB has a msg command */
583 bool has_msg_cmd = false;
584
f2ba57b5
CK
585 /* minimum buffer sizes */
586 unsigned buf_sizes[] = {
587 [0x00000000] = 2048,
588 [0x00000001] = 32 * 1024 * 1024,
589 [0x00000002] = 2048 * 1152 * 3,
590 [0x00000003] = 2048,
591 };
592
593 if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
594 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
595 p->chunks[p->chunk_ib_idx].length_dw);
596 return -EINVAL;
597 }
598
599 if (p->chunk_relocs_idx == -1) {
600 DRM_ERROR("No relocation chunk !\n");
601 return -EINVAL;
602 }
603
604
605 do {
606 r = radeon_cs_packet_parse(p, &pkt, p->idx);
607 if (r)
608 return r;
609 switch (pkt.type) {
610 case RADEON_PACKET_TYPE0:
56cc2c15
CK
611 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
612 buf_sizes, &has_msg_cmd);
f2ba57b5
CK
613 if (r)
614 return r;
615 break;
616 case RADEON_PACKET_TYPE2:
617 p->idx += pkt.count + 2;
618 break;
619 default:
620 DRM_ERROR("Unknown packet type %d !\n", pkt.type);
621 return -EINVAL;
622 }
623 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
56cc2c15
CK
624
625 if (!has_msg_cmd) {
626 DRM_ERROR("UVD-IBs need a msg command!\n");
627 return -EINVAL;
628 }
629
f2ba57b5
CK
630 return 0;
631}
632
633static int radeon_uvd_send_msg(struct radeon_device *rdev,
634 int ring, struct radeon_bo *bo,
635 struct radeon_fence **fence)
636{
637 struct ttm_validate_buffer tv;
ecff665f 638 struct ww_acquire_ctx ticket;
f2ba57b5
CK
639 struct list_head head;
640 struct radeon_ib ib;
641 uint64_t addr;
642 int i, r;
643
644 memset(&tv, 0, sizeof(tv));
645 tv.bo = &bo->tbo;
646
647 INIT_LIST_HEAD(&head);
648 list_add(&tv.head, &head);
649
ecff665f 650 r = ttm_eu_reserve_buffers(&ticket, &head);
f2ba57b5
CK
651 if (r)
652 return r;
653
654 radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
655 radeon_uvd_force_into_uvd_segment(bo);
656
657 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
ecff665f
ML
658 if (r)
659 goto err;
f2ba57b5 660
727ddc84 661 r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
ecff665f
ML
662 if (r)
663 goto err;
f2ba57b5
CK
664
665 addr = radeon_bo_gpu_offset(bo);
666 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
667 ib.ptr[1] = addr;
668 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
669 ib.ptr[3] = addr >> 32;
670 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
671 ib.ptr[5] = 0;
672 for (i = 6; i < 16; ++i)
673 ib.ptr[i] = PACKET2(0);
674 ib.length_dw = 16;
675
1538a9e0 676 r = radeon_ib_schedule(rdev, &ib, NULL, false);
ecff665f
ML
677 if (r)
678 goto err;
679 ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
f2ba57b5
CK
680
681 if (fence)
682 *fence = radeon_fence_ref(ib.fence);
683
684 radeon_ib_free(rdev, &ib);
685 radeon_bo_unref(&bo);
686 return 0;
ecff665f
ML
687
688err:
689 ttm_eu_backoff_reservation(&ticket, &head);
690 return r;
f2ba57b5
CK
691}
692
693/* multiple fence commands without any stream commands in between can
694 crash the vcpu so just try to emmit a dummy create/destroy msg to
695 avoid this */
696int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
697 uint32_t handle, struct radeon_fence **fence)
698{
699 struct radeon_bo *bo;
700 uint32_t *msg;
701 int r, i;
702
703 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
02376d82 704 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
f2ba57b5
CK
705 if (r)
706 return r;
707
708 r = radeon_bo_reserve(bo, false);
709 if (r) {
710 radeon_bo_unref(&bo);
711 return r;
712 }
713
714 r = radeon_bo_kmap(bo, (void **)&msg);
715 if (r) {
716 radeon_bo_unreserve(bo);
717 radeon_bo_unref(&bo);
718 return r;
719 }
720
721 /* stitch together an UVD create msg */
9b1be4dc
AD
722 msg[0] = cpu_to_le32(0x00000de4);
723 msg[1] = cpu_to_le32(0x00000000);
724 msg[2] = cpu_to_le32(handle);
725 msg[3] = cpu_to_le32(0x00000000);
726 msg[4] = cpu_to_le32(0x00000000);
727 msg[5] = cpu_to_le32(0x00000000);
728 msg[6] = cpu_to_le32(0x00000000);
729 msg[7] = cpu_to_le32(0x00000780);
730 msg[8] = cpu_to_le32(0x00000440);
731 msg[9] = cpu_to_le32(0x00000000);
732 msg[10] = cpu_to_le32(0x01b37000);
f2ba57b5 733 for (i = 11; i < 1024; ++i)
9b1be4dc 734 msg[i] = cpu_to_le32(0x0);
f2ba57b5
CK
735
736 radeon_bo_kunmap(bo);
737 radeon_bo_unreserve(bo);
738
739 return radeon_uvd_send_msg(rdev, ring, bo, fence);
740}
741
742int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
743 uint32_t handle, struct radeon_fence **fence)
744{
745 struct radeon_bo *bo;
746 uint32_t *msg;
747 int r, i;
748
749 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
02376d82 750 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
f2ba57b5
CK
751 if (r)
752 return r;
753
754 r = radeon_bo_reserve(bo, false);
755 if (r) {
756 radeon_bo_unref(&bo);
757 return r;
758 }
759
760 r = radeon_bo_kmap(bo, (void **)&msg);
761 if (r) {
762 radeon_bo_unreserve(bo);
763 radeon_bo_unref(&bo);
764 return r;
765 }
766
767 /* stitch together an UVD destroy msg */
9b1be4dc
AD
768 msg[0] = cpu_to_le32(0x00000de4);
769 msg[1] = cpu_to_le32(0x00000002);
770 msg[2] = cpu_to_le32(handle);
771 msg[3] = cpu_to_le32(0x00000000);
f2ba57b5 772 for (i = 4; i < 1024; ++i)
9b1be4dc 773 msg[i] = cpu_to_le32(0x0);
f2ba57b5
CK
774
775 radeon_bo_kunmap(bo);
776 radeon_bo_unreserve(bo);
777
778 return radeon_uvd_send_msg(rdev, ring, bo, fence);
779}
55b51c88 780
85a129ca
AD
781/**
782 * radeon_uvd_count_handles - count number of open streams
783 *
784 * @rdev: radeon_device pointer
785 * @sd: number of SD streams
786 * @hd: number of HD streams
787 *
788 * Count the number of open SD/HD streams as a hint for power mangement
789 */
790static void radeon_uvd_count_handles(struct radeon_device *rdev,
791 unsigned *sd, unsigned *hd)
792{
793 unsigned i;
794
795 *sd = 0;
796 *hd = 0;
797
798 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
799 if (!atomic_read(&rdev->uvd.handles[i]))
800 continue;
801
802 if (rdev->uvd.img_size[i] >= 720*576)
803 ++(*hd);
804 else
805 ++(*sd);
806 }
807}
808
55b51c88
CK
809static void radeon_uvd_idle_work_handler(struct work_struct *work)
810{
811 struct radeon_device *rdev =
812 container_of(work, struct radeon_device, uvd.idle_work.work);
813
8a227555
AD
814 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
815 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
8158eb9e
CK
816 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
817 &rdev->pm.dpm.hd);
ce3537d5 818 radeon_dpm_enable_uvd(rdev, false);
8a227555
AD
819 } else {
820 radeon_set_uvd_clocks(rdev, 0, 0);
821 }
822 } else {
55b51c88
CK
823 schedule_delayed_work(&rdev->uvd.idle_work,
824 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
8a227555 825 }
55b51c88
CK
826}
827
828void radeon_uvd_note_usage(struct radeon_device *rdev)
829{
ce3537d5 830 bool streams_changed = false;
55b51c88
CK
831 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
832 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
833 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
ce3537d5
AD
834
835 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
836 unsigned hd = 0, sd = 0;
837 radeon_uvd_count_handles(rdev, &sd, &hd);
838 if ((rdev->pm.dpm.sd != sd) ||
839 (rdev->pm.dpm.hd != hd)) {
840 rdev->pm.dpm.sd = sd;
841 rdev->pm.dpm.hd = hd;
0690a229
AD
842 /* disable this for now */
843 /*streams_changed = true;*/
ce3537d5
AD
844 }
845 }
846
847 if (set_clocks || streams_changed) {
8a227555 848 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
ce3537d5 849 radeon_dpm_enable_uvd(rdev, true);
8a227555
AD
850 } else {
851 radeon_set_uvd_clocks(rdev, 53300, 40000);
852 }
853 }
55b51c88 854}
facd112d
CK
855
856static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
857 unsigned target_freq,
858 unsigned pd_min,
859 unsigned pd_even)
860{
861 unsigned post_div = vco_freq / target_freq;
862
863 /* adjust to post divider minimum value */
864 if (post_div < pd_min)
865 post_div = pd_min;
866
867 /* we alway need a frequency less than or equal the target */
868 if ((vco_freq / post_div) > target_freq)
869 post_div += 1;
870
871 /* post dividers above a certain value must be even */
872 if (post_div > pd_even && post_div % 2)
873 post_div += 1;
874
875 return post_div;
876}
877
878/**
879 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
880 *
881 * @rdev: radeon_device pointer
882 * @vclk: wanted VCLK
883 * @dclk: wanted DCLK
884 * @vco_min: minimum VCO frequency
885 * @vco_max: maximum VCO frequency
886 * @fb_factor: factor to multiply vco freq with
887 * @fb_mask: limit and bitmask for feedback divider
888 * @pd_min: post divider minimum
889 * @pd_max: post divider maximum
890 * @pd_even: post divider must be even above this value
891 * @optimal_fb_div: resulting feedback divider
892 * @optimal_vclk_div: resulting vclk post divider
893 * @optimal_dclk_div: resulting dclk post divider
894 *
895 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
896 * Returns zero on success -EINVAL on error.
897 */
898int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
899 unsigned vclk, unsigned dclk,
900 unsigned vco_min, unsigned vco_max,
901 unsigned fb_factor, unsigned fb_mask,
902 unsigned pd_min, unsigned pd_max,
903 unsigned pd_even,
904 unsigned *optimal_fb_div,
905 unsigned *optimal_vclk_div,
906 unsigned *optimal_dclk_div)
907{
908 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
909
910 /* start off with something large */
911 unsigned optimal_score = ~0;
912
913 /* loop through vco from low to high */
914 vco_min = max(max(vco_min, vclk), dclk);
915 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
916
917 uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
918 unsigned vclk_div, dclk_div, score;
919
920 do_div(fb_div, ref_freq);
921
922 /* fb div out of range ? */
923 if (fb_div > fb_mask)
924 break; /* it can oly get worse */
925
926 fb_div &= fb_mask;
927
928 /* calc vclk divider with current vco freq */
929 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
930 pd_min, pd_even);
931 if (vclk_div > pd_max)
932 break; /* vco is too big, it has to stop */
933
934 /* calc dclk divider with current vco freq */
935 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
936 pd_min, pd_even);
937 if (vclk_div > pd_max)
938 break; /* vco is too big, it has to stop */
939
940 /* calc score with current vco freq */
941 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
942
943 /* determine if this vco setting is better than current optimal settings */
944 if (score < optimal_score) {
945 *optimal_fb_div = fb_div;
946 *optimal_vclk_div = vclk_div;
947 *optimal_dclk_div = dclk_div;
948 optimal_score = score;
949 if (optimal_score == 0)
950 break; /* it can't get better than this */
951 }
952 }
953
954 /* did we found a valid setup ? */
955 if (optimal_score == ~0)
956 return -EINVAL;
957
958 return 0;
959}
960
961int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
962 unsigned cg_upll_func_cntl)
963{
964 unsigned i;
965
966 /* make sure UPLL_CTLREQ is deasserted */
967 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
968
969 mdelay(10);
970
971 /* assert UPLL_CTLREQ */
972 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
973
974 /* wait for CTLACK and CTLACK2 to get asserted */
975 for (i = 0; i < 100; ++i) {
976 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
977 if ((RREG32(cg_upll_func_cntl) & mask) == mask)
978 break;
979 mdelay(10);
980 }
981
982 /* deassert UPLL_CTLREQ */
983 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
984
985 if (i == 100) {
986 DRM_ERROR("Timeout setting UVD clocks!\n");
987 return -ETIMEDOUT;
988 }
989
990 return 0;
991}