Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the | |
7 | * "Software"), to deal in the Software without restriction, including | |
8 | * without limitation the rights to use, copy, modify, merge, publish, | |
9 | * distribute, sub license, and/or sell copies of the Software, and to | |
10 | * permit persons to whom the Software is furnished to do so, subject to | |
11 | * the following conditions: | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 | * | |
21 | * The above copyright notice and this permission notice (including the | |
22 | * next paragraph) shall be included in all copies or substantial portions | |
23 | * of the Software. | |
24 | * | |
25 | * Authors: Christian König <christian.koenig@amd.com> | |
26 | */ | |
27 | ||
28 | #include <linux/firmware.h> | |
29 | #include <linux/module.h> | |
30 | #include <drm/drmP.h> | |
31 | #include <drm/drm.h> | |
32 | ||
33 | #include "amdgpu.h" | |
34 | #include "amdgpu_pm.h" | |
35 | #include "amdgpu_vce.h" | |
36 | #include "cikd.h" | |
37 | ||
38 | /* 1 second timeout */ | |
182830a1 | 39 | #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) |
d38ceaf9 AD |
40 | |
41 | /* Firmware Names */ | |
42 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
43 | #define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin" | |
edf600da CK |
44 | #define FIRMWARE_KABINI "radeon/kabini_vce.bin" |
45 | #define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" | |
46 | #define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" | |
d38ceaf9 AD |
47 | #define FIRMWARE_MULLINS "radeon/mullins_vce.bin" |
48 | #endif | |
c65444fe JZ |
49 | #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" |
50 | #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" | |
188a9bcd | 51 | #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" |
cfaba566 | 52 | #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" |
2cc0c0b5 FC |
53 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" |
54 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" | |
d38ceaf9 AD |
55 | |
56 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
57 | MODULE_FIRMWARE(FIRMWARE_BONAIRE); | |
58 | MODULE_FIRMWARE(FIRMWARE_KABINI); | |
59 | MODULE_FIRMWARE(FIRMWARE_KAVERI); | |
60 | MODULE_FIRMWARE(FIRMWARE_HAWAII); | |
61 | MODULE_FIRMWARE(FIRMWARE_MULLINS); | |
62 | #endif | |
63 | MODULE_FIRMWARE(FIRMWARE_TONGA); | |
64 | MODULE_FIRMWARE(FIRMWARE_CARRIZO); | |
188a9bcd | 65 | MODULE_FIRMWARE(FIRMWARE_FIJI); |
cfaba566 | 66 | MODULE_FIRMWARE(FIRMWARE_STONEY); |
2cc0c0b5 FC |
67 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); |
68 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); | |
d38ceaf9 AD |
69 | |
70 | static void amdgpu_vce_idle_work_handler(struct work_struct *work); | |
71 | ||
72 | /** | |
73 | * amdgpu_vce_init - allocate memory, load vce firmware | |
74 | * | |
75 | * @adev: amdgpu_device pointer | |
76 | * | |
77 | * First step to get VCE online, allocate memory and load the firmware | |
78 | */ | |
e9822622 | 79 | int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) |
d38ceaf9 | 80 | { |
c594989c CK |
81 | struct amdgpu_ring *ring; |
82 | struct amd_sched_rq *rq; | |
d38ceaf9 AD |
83 | const char *fw_name; |
84 | const struct common_firmware_header *hdr; | |
85 | unsigned ucode_version, version_major, version_minor, binary_id; | |
86 | int i, r; | |
87 | ||
d38ceaf9 AD |
88 | switch (adev->asic_type) { |
89 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
90 | case CHIP_BONAIRE: | |
91 | fw_name = FIRMWARE_BONAIRE; | |
92 | break; | |
93 | case CHIP_KAVERI: | |
94 | fw_name = FIRMWARE_KAVERI; | |
95 | break; | |
96 | case CHIP_KABINI: | |
97 | fw_name = FIRMWARE_KABINI; | |
98 | break; | |
99 | case CHIP_HAWAII: | |
100 | fw_name = FIRMWARE_HAWAII; | |
101 | break; | |
102 | case CHIP_MULLINS: | |
103 | fw_name = FIRMWARE_MULLINS; | |
104 | break; | |
105 | #endif | |
106 | case CHIP_TONGA: | |
107 | fw_name = FIRMWARE_TONGA; | |
108 | break; | |
109 | case CHIP_CARRIZO: | |
110 | fw_name = FIRMWARE_CARRIZO; | |
111 | break; | |
188a9bcd AD |
112 | case CHIP_FIJI: |
113 | fw_name = FIRMWARE_FIJI; | |
114 | break; | |
cfaba566 SL |
115 | case CHIP_STONEY: |
116 | fw_name = FIRMWARE_STONEY; | |
117 | break; | |
2cc0c0b5 FC |
118 | case CHIP_POLARIS10: |
119 | fw_name = FIRMWARE_POLARIS10; | |
1b4eeea5 | 120 | break; |
2cc0c0b5 FC |
121 | case CHIP_POLARIS11: |
122 | fw_name = FIRMWARE_POLARIS11; | |
1b4eeea5 | 123 | break; |
d38ceaf9 AD |
124 | |
125 | default: | |
126 | return -EINVAL; | |
127 | } | |
128 | ||
129 | r = request_firmware(&adev->vce.fw, fw_name, adev->dev); | |
130 | if (r) { | |
131 | dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n", | |
132 | fw_name); | |
133 | return r; | |
134 | } | |
135 | ||
136 | r = amdgpu_ucode_validate(adev->vce.fw); | |
137 | if (r) { | |
138 | dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", | |
139 | fw_name); | |
140 | release_firmware(adev->vce.fw); | |
141 | adev->vce.fw = NULL; | |
142 | return r; | |
143 | } | |
144 | ||
145 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
146 | ||
147 | ucode_version = le32_to_cpu(hdr->ucode_version); | |
148 | version_major = (ucode_version >> 20) & 0xfff; | |
149 | version_minor = (ucode_version >> 8) & 0xfff; | |
150 | binary_id = ucode_version & 0xff; | |
151 | DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n", | |
152 | version_major, version_minor, binary_id); | |
153 | adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | | |
154 | (binary_id << 8)); | |
155 | ||
156 | /* allocate firmware, stack and heap BO */ | |
157 | ||
d38ceaf9 | 158 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, |
857d913d AD |
159 | AMDGPU_GEM_DOMAIN_VRAM, |
160 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, | |
72d7668b | 161 | NULL, NULL, &adev->vce.vcpu_bo); |
d38ceaf9 AD |
162 | if (r) { |
163 | dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); | |
164 | return r; | |
165 | } | |
166 | ||
167 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | |
168 | if (r) { | |
169 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | |
170 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | |
171 | return r; | |
172 | } | |
173 | ||
174 | r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, | |
175 | &adev->vce.gpu_addr); | |
176 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
177 | if (r) { | |
178 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | |
179 | dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); | |
180 | return r; | |
181 | } | |
182 | ||
c594989c CK |
183 | |
184 | ring = &adev->vce.ring[0]; | |
185 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | |
186 | r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, | |
187 | rq, amdgpu_sched_jobs); | |
188 | if (r != 0) { | |
189 | DRM_ERROR("Failed setting up VCE run queue.\n"); | |
190 | return r; | |
191 | } | |
192 | ||
d38ceaf9 AD |
193 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { |
194 | atomic_set(&adev->vce.handles[i], 0); | |
195 | adev->vce.filp[i] = NULL; | |
196 | } | |
197 | ||
ebff485e CK |
198 | INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler); |
199 | mutex_init(&adev->vce.idle_mutex); | |
200 | ||
d38ceaf9 AD |
201 | return 0; |
202 | } | |
203 | ||
204 | /** | |
205 | * amdgpu_vce_fini - free memory | |
206 | * | |
207 | * @adev: amdgpu_device pointer | |
208 | * | |
209 | * Last step on VCE teardown, free firmware memory | |
210 | */ | |
211 | int amdgpu_vce_sw_fini(struct amdgpu_device *adev) | |
212 | { | |
4cd00d37 GI |
213 | unsigned i; |
214 | ||
d38ceaf9 AD |
215 | if (adev->vce.vcpu_bo == NULL) |
216 | return 0; | |
217 | ||
c594989c CK |
218 | amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); |
219 | ||
d38ceaf9 AD |
220 | amdgpu_bo_unref(&adev->vce.vcpu_bo); |
221 | ||
4cd00d37 GI |
222 | for (i = 0; i < adev->vce.num_rings; i++) |
223 | amdgpu_ring_fini(&adev->vce.ring[i]); | |
d38ceaf9 AD |
224 | |
225 | release_firmware(adev->vce.fw); | |
ebff485e | 226 | mutex_destroy(&adev->vce.idle_mutex); |
d38ceaf9 AD |
227 | |
228 | return 0; | |
229 | } | |
230 | ||
231 | /** | |
232 | * amdgpu_vce_suspend - unpin VCE fw memory | |
233 | * | |
234 | * @adev: amdgpu_device pointer | |
235 | * | |
236 | */ | |
237 | int amdgpu_vce_suspend(struct amdgpu_device *adev) | |
238 | { | |
239 | int i; | |
240 | ||
241 | if (adev->vce.vcpu_bo == NULL) | |
242 | return 0; | |
243 | ||
244 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) | |
245 | if (atomic_read(&adev->vce.handles[i])) | |
246 | break; | |
247 | ||
248 | if (i == AMDGPU_MAX_VCE_HANDLES) | |
249 | return 0; | |
250 | ||
85cc88f0 | 251 | cancel_delayed_work_sync(&adev->vce.idle_work); |
d38ceaf9 AD |
252 | /* TODO: suspending running encoding sessions isn't supported */ |
253 | return -EINVAL; | |
254 | } | |
255 | ||
256 | /** | |
257 | * amdgpu_vce_resume - pin VCE fw memory | |
258 | * | |
259 | * @adev: amdgpu_device pointer | |
260 | * | |
261 | */ | |
262 | int amdgpu_vce_resume(struct amdgpu_device *adev) | |
263 | { | |
264 | void *cpu_addr; | |
265 | const struct common_firmware_header *hdr; | |
266 | unsigned offset; | |
267 | int r; | |
268 | ||
269 | if (adev->vce.vcpu_bo == NULL) | |
270 | return -EINVAL; | |
271 | ||
272 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | |
273 | if (r) { | |
274 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | |
275 | return r; | |
276 | } | |
277 | ||
278 | r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); | |
279 | if (r) { | |
280 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
281 | dev_err(adev->dev, "(%d) VCE map failed\n", r); | |
282 | return r; | |
283 | } | |
284 | ||
285 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
286 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); | |
7b4d3e29 CK |
287 | memcpy_toio(cpu_addr, adev->vce.fw->data + offset, |
288 | adev->vce.fw->size - offset); | |
d38ceaf9 AD |
289 | |
290 | amdgpu_bo_kunmap(adev->vce.vcpu_bo); | |
291 | ||
292 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
293 | ||
294 | return 0; | |
295 | } | |
296 | ||
297 | /** | |
298 | * amdgpu_vce_idle_work_handler - power off VCE | |
299 | * | |
300 | * @work: pointer to work structure | |
301 | * | |
302 | * power of VCE when it's not used any more | |
303 | */ | |
304 | static void amdgpu_vce_idle_work_handler(struct work_struct *work) | |
305 | { | |
306 | struct amdgpu_device *adev = | |
307 | container_of(work, struct amdgpu_device, vce.idle_work.work); | |
24c5fe56 | 308 | unsigned i, count = 0; |
d38ceaf9 | 309 | |
24c5fe56 AD |
310 | for (i = 0; i < adev->vce.num_rings; i++) |
311 | count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); | |
312 | ||
313 | if (count == 0) { | |
d38ceaf9 AD |
314 | if (adev->pm.dpm_enabled) { |
315 | amdgpu_dpm_enable_vce(adev, false); | |
316 | } else { | |
317 | amdgpu_asic_set_vce_clocks(adev, 0, 0); | |
318 | } | |
319 | } else { | |
182830a1 | 320 | schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); |
d38ceaf9 AD |
321 | } |
322 | } | |
323 | ||
324 | /** | |
ebff485e | 325 | * amdgpu_vce_ring_begin_use - power up VCE |
d38ceaf9 | 326 | * |
ebff485e | 327 | * @ring: amdgpu ring |
d38ceaf9 AD |
328 | * |
329 | * Make sure VCE is powerd up when we want to use it | |
330 | */ | |
ebff485e | 331 | void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) |
d38ceaf9 | 332 | { |
ebff485e CK |
333 | struct amdgpu_device *adev = ring->adev; |
334 | bool set_clocks; | |
d38ceaf9 | 335 | |
ebff485e CK |
336 | mutex_lock(&adev->vce.idle_mutex); |
337 | set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work); | |
182830a1 | 338 | if (set_clocks) { |
d38ceaf9 AD |
339 | if (adev->pm.dpm_enabled) { |
340 | amdgpu_dpm_enable_vce(adev, true); | |
341 | } else { | |
342 | amdgpu_asic_set_vce_clocks(adev, 53300, 40000); | |
343 | } | |
344 | } | |
ebff485e CK |
345 | mutex_unlock(&adev->vce.idle_mutex); |
346 | } | |
347 | ||
348 | /** | |
349 | * amdgpu_vce_ring_end_use - power VCE down | |
350 | * | |
351 | * @ring: amdgpu ring | |
352 | * | |
353 | * Schedule work to power VCE down again | |
354 | */ | |
355 | void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) | |
356 | { | |
357 | schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); | |
d38ceaf9 AD |
358 | } |
359 | ||
360 | /** | |
361 | * amdgpu_vce_free_handles - free still open VCE handles | |
362 | * | |
363 | * @adev: amdgpu_device pointer | |
364 | * @filp: drm file pointer | |
365 | * | |
366 | * Close all VCE handles still open by this file pointer | |
367 | */ | |
368 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |
369 | { | |
370 | struct amdgpu_ring *ring = &adev->vce.ring[0]; | |
371 | int i, r; | |
372 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
373 | uint32_t handle = atomic_read(&adev->vce.handles[i]); | |
182830a1 | 374 | |
d38ceaf9 AD |
375 | if (!handle || adev->vce.filp[i] != filp) |
376 | continue; | |
377 | ||
9f2ade33 | 378 | r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); |
d38ceaf9 AD |
379 | if (r) |
380 | DRM_ERROR("Error destroying VCE handle (%d)!\n", r); | |
381 | ||
382 | adev->vce.filp[i] = NULL; | |
383 | atomic_set(&adev->vce.handles[i], 0); | |
384 | } | |
385 | } | |
386 | ||
387 | /** | |
388 | * amdgpu_vce_get_create_msg - generate a VCE create msg | |
389 | * | |
390 | * @adev: amdgpu_device pointer | |
391 | * @ring: ring we should submit the msg to | |
392 | * @handle: VCE session handle to use | |
393 | * @fence: optional fence to return | |
394 | * | |
395 | * Open up a stream for HW test | |
396 | */ | |
397 | int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | |
ed40bfb8 | 398 | struct fence **fence) |
d38ceaf9 AD |
399 | { |
400 | const unsigned ib_size_dw = 1024; | |
d71518b5 CK |
401 | struct amdgpu_job *job; |
402 | struct amdgpu_ib *ib; | |
1763552e | 403 | struct fence *f = NULL; |
d38ceaf9 AD |
404 | uint64_t dummy; |
405 | int i, r; | |
406 | ||
d71518b5 CK |
407 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
408 | if (r) | |
d38ceaf9 | 409 | return r; |
d71518b5 CK |
410 | |
411 | ib = &job->ibs[0]; | |
d38ceaf9 | 412 | |
8128765c | 413 | dummy = ib->gpu_addr + 1024; |
d38ceaf9 AD |
414 | |
415 | /* stitch together an VCE create msg */ | |
8128765c CZ |
416 | ib->length_dw = 0; |
417 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
418 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
419 | ib->ptr[ib->length_dw++] = handle; | |
420 | ||
d66f8e48 LL |
421 | if ((ring->adev->vce.fw_version >> 24) >= 52) |
422 | ib->ptr[ib->length_dw++] = 0x00000040; /* len */ | |
423 | else | |
424 | ib->ptr[ib->length_dw++] = 0x00000030; /* len */ | |
8128765c CZ |
425 | ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ |
426 | ib->ptr[ib->length_dw++] = 0x00000000; | |
427 | ib->ptr[ib->length_dw++] = 0x00000042; | |
428 | ib->ptr[ib->length_dw++] = 0x0000000a; | |
429 | ib->ptr[ib->length_dw++] = 0x00000001; | |
430 | ib->ptr[ib->length_dw++] = 0x00000080; | |
431 | ib->ptr[ib->length_dw++] = 0x00000060; | |
432 | ib->ptr[ib->length_dw++] = 0x00000100; | |
433 | ib->ptr[ib->length_dw++] = 0x00000100; | |
434 | ib->ptr[ib->length_dw++] = 0x0000000c; | |
435 | ib->ptr[ib->length_dw++] = 0x00000000; | |
d66f8e48 LL |
436 | if ((ring->adev->vce.fw_version >> 24) >= 52) { |
437 | ib->ptr[ib->length_dw++] = 0x00000000; | |
438 | ib->ptr[ib->length_dw++] = 0x00000000; | |
439 | ib->ptr[ib->length_dw++] = 0x00000000; | |
440 | ib->ptr[ib->length_dw++] = 0x00000000; | |
441 | } | |
8128765c CZ |
442 | |
443 | ib->ptr[ib->length_dw++] = 0x00000014; /* len */ | |
444 | ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ | |
445 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); | |
446 | ib->ptr[ib->length_dw++] = dummy; | |
447 | ib->ptr[ib->length_dw++] = 0x00000001; | |
448 | ||
449 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
450 | ib->ptr[i] = 0x0; | |
451 | ||
c5637837 | 452 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); |
22a77cf6 | 453 | job->fence = fence_get(f); |
8128765c CZ |
454 | if (r) |
455 | goto err; | |
9f2ade33 CK |
456 | |
457 | amdgpu_job_free(job); | |
d38ceaf9 | 458 | if (fence) |
1763552e | 459 | *fence = fence_get(f); |
281b4223 | 460 | fence_put(f); |
cadf97b1 | 461 | return 0; |
d71518b5 | 462 | |
8128765c | 463 | err: |
d71518b5 | 464 | amdgpu_job_free(job); |
d38ceaf9 AD |
465 | return r; |
466 | } | |
467 | ||
468 | /** | |
469 | * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg | |
470 | * | |
471 | * @adev: amdgpu_device pointer | |
472 | * @ring: ring we should submit the msg to | |
473 | * @handle: VCE session handle to use | |
474 | * @fence: optional fence to return | |
475 | * | |
476 | * Close up a stream for HW test or if userspace failed to do so | |
477 | */ | |
478 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |
9f2ade33 | 479 | bool direct, struct fence **fence) |
d38ceaf9 AD |
480 | { |
481 | const unsigned ib_size_dw = 1024; | |
d71518b5 CK |
482 | struct amdgpu_job *job; |
483 | struct amdgpu_ib *ib; | |
1763552e | 484 | struct fence *f = NULL; |
d38ceaf9 AD |
485 | int i, r; |
486 | ||
d71518b5 CK |
487 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
488 | if (r) | |
d38ceaf9 | 489 | return r; |
d38ceaf9 | 490 | |
d71518b5 | 491 | ib = &job->ibs[0]; |
d38ceaf9 AD |
492 | |
493 | /* stitch together an VCE destroy msg */ | |
8128765c CZ |
494 | ib->length_dw = 0; |
495 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
496 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
497 | ib->ptr[ib->length_dw++] = handle; | |
498 | ||
99453a9e RZ |
499 | ib->ptr[ib->length_dw++] = 0x00000020; /* len */ |
500 | ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ | |
501 | ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ | |
502 | ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ | |
503 | ib->ptr[ib->length_dw++] = 0x00000000; | |
504 | ib->ptr[ib->length_dw++] = 0x00000000; | |
505 | ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ | |
506 | ib->ptr[ib->length_dw++] = 0x00000000; | |
8128765c CZ |
507 | |
508 | ib->ptr[ib->length_dw++] = 0x00000008; /* len */ | |
509 | ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ | |
510 | ||
511 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
512 | ib->ptr[i] = 0x0; | |
9f2ade33 CK |
513 | |
514 | if (direct) { | |
c5637837 | 515 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); |
22a77cf6 | 516 | job->fence = fence_get(f); |
9f2ade33 CK |
517 | if (r) |
518 | goto err; | |
519 | ||
520 | amdgpu_job_free(job); | |
521 | } else { | |
c594989c | 522 | r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, |
9f2ade33 CK |
523 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); |
524 | if (r) | |
525 | goto err; | |
526 | } | |
527 | ||
d38ceaf9 | 528 | if (fence) |
1763552e | 529 | *fence = fence_get(f); |
281b4223 | 530 | fence_put(f); |
cadf97b1 | 531 | return 0; |
d71518b5 | 532 | |
8128765c | 533 | err: |
d71518b5 | 534 | amdgpu_job_free(job); |
d38ceaf9 AD |
535 | return r; |
536 | } | |
537 | ||
538 | /** | |
539 | * amdgpu_vce_cs_reloc - command submission relocation | |
540 | * | |
541 | * @p: parser context | |
542 | * @lo: address of lower dword | |
543 | * @hi: address of higher dword | |
f1689ec1 | 544 | * @size: minimum size |
d38ceaf9 AD |
545 | * |
546 | * Patch relocation inside command stream with real buffer address | |
547 | */ | |
f1689ec1 | 548 | static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, |
dc78330a | 549 | int lo, int hi, unsigned size, uint32_t index) |
d38ceaf9 AD |
550 | { |
551 | struct amdgpu_bo_va_mapping *mapping; | |
d38ceaf9 AD |
552 | struct amdgpu_bo *bo; |
553 | uint64_t addr; | |
554 | ||
dc78330a CK |
555 | if (index == 0xffffffff) |
556 | index = 0; | |
557 | ||
d38ceaf9 AD |
558 | addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | |
559 | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; | |
dc78330a | 560 | addr += ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 AD |
561 | |
562 | mapping = amdgpu_cs_find_mapping(p, addr, &bo); | |
563 | if (mapping == NULL) { | |
dc78330a CK |
564 | DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", |
565 | addr, lo, hi, size, index); | |
d38ceaf9 AD |
566 | return -EINVAL; |
567 | } | |
568 | ||
f1689ec1 CK |
569 | if ((addr + (uint64_t)size) > |
570 | ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { | |
571 | DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", | |
572 | addr, lo, hi); | |
573 | return -EINVAL; | |
574 | } | |
575 | ||
d38ceaf9 AD |
576 | addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; |
577 | addr += amdgpu_bo_gpu_offset(bo); | |
dc78330a | 578 | addr -= ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 | 579 | |
7270f839 CK |
580 | amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); |
581 | amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); | |
d38ceaf9 AD |
582 | |
583 | return 0; | |
584 | } | |
585 | ||
f1689ec1 CK |
586 | /** |
587 | * amdgpu_vce_validate_handle - validate stream handle | |
588 | * | |
589 | * @p: parser context | |
590 | * @handle: handle to validate | |
2f4b9368 | 591 | * @allocated: allocated a new handle? |
f1689ec1 CK |
592 | * |
593 | * Validates the handle and return the found session index or -EINVAL | |
594 | * we we don't have another free session index. | |
595 | */ | |
596 | static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, | |
e5223214 | 597 | uint32_t handle, uint32_t *allocated) |
f1689ec1 CK |
598 | { |
599 | unsigned i; | |
600 | ||
601 | /* validate the handle */ | |
602 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
2f4b9368 CK |
603 | if (atomic_read(&p->adev->vce.handles[i]) == handle) { |
604 | if (p->adev->vce.filp[i] != p->filp) { | |
605 | DRM_ERROR("VCE handle collision detected!\n"); | |
606 | return -EINVAL; | |
607 | } | |
f1689ec1 | 608 | return i; |
2f4b9368 | 609 | } |
f1689ec1 CK |
610 | } |
611 | ||
612 | /* handle not found try to alloc a new one */ | |
613 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
614 | if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { | |
615 | p->adev->vce.filp[i] = p->filp; | |
616 | p->adev->vce.img_size[i] = 0; | |
e5223214 | 617 | *allocated |= 1 << i; |
f1689ec1 CK |
618 | return i; |
619 | } | |
620 | } | |
621 | ||
622 | DRM_ERROR("No more free VCE handles!\n"); | |
623 | return -EINVAL; | |
624 | } | |
625 | ||
d38ceaf9 AD |
626 | /** |
627 | * amdgpu_vce_cs_parse - parse and validate the command stream | |
628 | * | |
629 | * @p: parser context | |
630 | * | |
631 | */ | |
632 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | |
633 | { | |
50838c8c | 634 | struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; |
dc78330a | 635 | unsigned fb_idx = 0, bs_idx = 0; |
f1689ec1 | 636 | int session_idx = -1; |
e5223214 CK |
637 | uint32_t destroyed = 0; |
638 | uint32_t created = 0; | |
639 | uint32_t allocated = 0; | |
f1689ec1 CK |
640 | uint32_t tmp, handle = 0; |
641 | uint32_t *size = &tmp; | |
c855e250 CK |
642 | int i, r, idx = 0; |
643 | ||
644 | r = amdgpu_cs_sysvm_access_required(p); | |
645 | if (r) | |
646 | return r; | |
d38ceaf9 | 647 | |
d38ceaf9 AD |
648 | while (idx < ib->length_dw) { |
649 | uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); | |
650 | uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); | |
651 | ||
652 | if ((len < 8) || (len & 3)) { | |
653 | DRM_ERROR("invalid VCE command length (%d)!\n", len); | |
2f4b9368 CK |
654 | r = -EINVAL; |
655 | goto out; | |
d38ceaf9 AD |
656 | } |
657 | ||
658 | switch (cmd) { | |
182830a1 | 659 | case 0x00000001: /* session */ |
d38ceaf9 | 660 | handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); |
2f4b9368 CK |
661 | session_idx = amdgpu_vce_validate_handle(p, handle, |
662 | &allocated); | |
e5223214 CK |
663 | if (session_idx < 0) { |
664 | r = session_idx; | |
665 | goto out; | |
666 | } | |
f1689ec1 | 667 | size = &p->adev->vce.img_size[session_idx]; |
d38ceaf9 AD |
668 | break; |
669 | ||
182830a1 | 670 | case 0x00000002: /* task info */ |
dc78330a CK |
671 | fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); |
672 | bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); | |
f1689ec1 CK |
673 | break; |
674 | ||
182830a1 | 675 | case 0x01000001: /* create */ |
e5223214 CK |
676 | created |= 1 << session_idx; |
677 | if (destroyed & (1 << session_idx)) { | |
678 | destroyed &= ~(1 << session_idx); | |
679 | allocated |= 1 << session_idx; | |
680 | ||
681 | } else if (!(allocated & (1 << session_idx))) { | |
2f4b9368 CK |
682 | DRM_ERROR("Handle already in use!\n"); |
683 | r = -EINVAL; | |
684 | goto out; | |
685 | } | |
686 | ||
f1689ec1 CK |
687 | *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * |
688 | amdgpu_get_ib_value(p, ib_idx, idx + 10) * | |
689 | 8 * 3 / 2; | |
690 | break; | |
691 | ||
182830a1 CK |
692 | case 0x04000001: /* config extension */ |
693 | case 0x04000002: /* pic control */ | |
694 | case 0x04000005: /* rate control */ | |
695 | case 0x04000007: /* motion estimation */ | |
696 | case 0x04000008: /* rdo */ | |
697 | case 0x04000009: /* vui */ | |
698 | case 0x05000002: /* auxiliary buffer */ | |
4f827785 | 699 | case 0x05000009: /* clock table */ |
d38ceaf9 AD |
700 | break; |
701 | ||
5eeda8a4 AD |
702 | case 0x0500000c: /* hw config */ |
703 | switch (p->adev->asic_type) { | |
704 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
705 | case CHIP_KAVERI: | |
706 | case CHIP_MULLINS: | |
707 | #endif | |
708 | case CHIP_CARRIZO: | |
709 | break; | |
710 | default: | |
711 | r = -EINVAL; | |
712 | goto out; | |
713 | } | |
714 | break; | |
715 | ||
182830a1 | 716 | case 0x03000001: /* encode */ |
f1689ec1 | 717 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, |
dc78330a | 718 | *size, 0); |
d38ceaf9 | 719 | if (r) |
2f4b9368 | 720 | goto out; |
d38ceaf9 | 721 | |
f1689ec1 | 722 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, |
dc78330a | 723 | *size / 3, 0); |
d38ceaf9 | 724 | if (r) |
2f4b9368 | 725 | goto out; |
d38ceaf9 AD |
726 | break; |
727 | ||
182830a1 | 728 | case 0x02000001: /* destroy */ |
e5223214 | 729 | destroyed |= 1 << session_idx; |
d38ceaf9 AD |
730 | break; |
731 | ||
182830a1 | 732 | case 0x05000001: /* context buffer */ |
f1689ec1 | 733 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, |
dc78330a | 734 | *size * 2, 0); |
f1689ec1 | 735 | if (r) |
2f4b9368 | 736 | goto out; |
f1689ec1 CK |
737 | break; |
738 | ||
182830a1 | 739 | case 0x05000004: /* video bitstream buffer */ |
f1689ec1 CK |
740 | tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); |
741 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, | |
dc78330a | 742 | tmp, bs_idx); |
f1689ec1 | 743 | if (r) |
2f4b9368 | 744 | goto out; |
f1689ec1 CK |
745 | break; |
746 | ||
182830a1 | 747 | case 0x05000005: /* feedback buffer */ |
f1689ec1 | 748 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, |
dc78330a | 749 | 4096, fb_idx); |
d38ceaf9 | 750 | if (r) |
2f4b9368 | 751 | goto out; |
d38ceaf9 AD |
752 | break; |
753 | ||
754 | default: | |
755 | DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); | |
2f4b9368 CK |
756 | r = -EINVAL; |
757 | goto out; | |
d38ceaf9 AD |
758 | } |
759 | ||
f1689ec1 CK |
760 | if (session_idx == -1) { |
761 | DRM_ERROR("no session command at start of IB\n"); | |
2f4b9368 CK |
762 | r = -EINVAL; |
763 | goto out; | |
f1689ec1 CK |
764 | } |
765 | ||
d38ceaf9 AD |
766 | idx += len / 4; |
767 | } | |
768 | ||
e5223214 | 769 | if (allocated & ~created) { |
2f4b9368 CK |
770 | DRM_ERROR("New session without create command!\n"); |
771 | r = -ENOENT; | |
772 | } | |
773 | ||
774 | out: | |
e5223214 CK |
775 | if (!r) { |
776 | /* No error, free all destroyed handle slots */ | |
777 | tmp = destroyed; | |
778 | } else { | |
779 | /* Error during parsing, free all allocated handle slots */ | |
780 | tmp = allocated; | |
d38ceaf9 AD |
781 | } |
782 | ||
e5223214 CK |
783 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) |
784 | if (tmp & (1 << i)) | |
785 | atomic_set(&p->adev->vce.handles[i], 0); | |
786 | ||
2f4b9368 | 787 | return r; |
d38ceaf9 AD |
788 | } |
789 | ||
d38ceaf9 AD |
790 | /** |
791 | * amdgpu_vce_ring_emit_ib - execute indirect buffer | |
792 | * | |
793 | * @ring: engine to use | |
794 | * @ib: the IB to execute | |
795 | * | |
796 | */ | |
d88bf583 CK |
797 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, |
798 | unsigned vm_id, bool ctx_switch) | |
d38ceaf9 AD |
799 | { |
800 | amdgpu_ring_write(ring, VCE_CMD_IB); | |
801 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | |
802 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | |
803 | amdgpu_ring_write(ring, ib->length_dw); | |
804 | } | |
805 | ||
806 | /** | |
807 | * amdgpu_vce_ring_emit_fence - add a fence command to the ring | |
808 | * | |
809 | * @ring: engine to use | |
810 | * @fence: the fence | |
811 | * | |
812 | */ | |
813 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | |
890ee23f | 814 | unsigned flags) |
d38ceaf9 | 815 | { |
890ee23f | 816 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
d38ceaf9 AD |
817 | |
818 | amdgpu_ring_write(ring, VCE_CMD_FENCE); | |
819 | amdgpu_ring_write(ring, addr); | |
820 | amdgpu_ring_write(ring, upper_32_bits(addr)); | |
821 | amdgpu_ring_write(ring, seq); | |
822 | amdgpu_ring_write(ring, VCE_CMD_TRAP); | |
823 | amdgpu_ring_write(ring, VCE_CMD_END); | |
824 | } | |
825 | ||
a6f8d728 AD |
826 | unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring) |
827 | { | |
828 | return | |
829 | 4; /* amdgpu_vce_ring_emit_ib */ | |
830 | } | |
831 | ||
832 | unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring) | |
833 | { | |
834 | return | |
835 | 6; /* amdgpu_vce_ring_emit_fence x1 no user fence */ | |
836 | } | |
837 | ||
d38ceaf9 AD |
838 | /** |
839 | * amdgpu_vce_ring_test_ring - test if VCE ring is working | |
840 | * | |
841 | * @ring: the engine to test on | |
842 | * | |
843 | */ | |
844 | int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |
845 | { | |
846 | struct amdgpu_device *adev = ring->adev; | |
847 | uint32_t rptr = amdgpu_ring_get_rptr(ring); | |
848 | unsigned i; | |
849 | int r; | |
850 | ||
a27de35c | 851 | r = amdgpu_ring_alloc(ring, 16); |
d38ceaf9 AD |
852 | if (r) { |
853 | DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", | |
854 | ring->idx, r); | |
855 | return r; | |
856 | } | |
857 | amdgpu_ring_write(ring, VCE_CMD_END); | |
a27de35c | 858 | amdgpu_ring_commit(ring); |
d38ceaf9 AD |
859 | |
860 | for (i = 0; i < adev->usec_timeout; i++) { | |
861 | if (amdgpu_ring_get_rptr(ring) != rptr) | |
862 | break; | |
863 | DRM_UDELAY(1); | |
864 | } | |
865 | ||
866 | if (i < adev->usec_timeout) { | |
867 | DRM_INFO("ring test on %d succeeded in %d usecs\n", | |
868 | ring->idx, i); | |
869 | } else { | |
870 | DRM_ERROR("amdgpu: ring %d test failed\n", | |
871 | ring->idx); | |
872 | r = -ETIMEDOUT; | |
873 | } | |
874 | ||
875 | return r; | |
876 | } | |
877 | ||
878 | /** | |
879 | * amdgpu_vce_ring_test_ib - test if VCE IBs are working | |
880 | * | |
881 | * @ring: the engine to test on | |
882 | * | |
883 | */ | |
bbec97aa | 884 | int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
d38ceaf9 | 885 | { |
ed40bfb8 | 886 | struct fence *fence = NULL; |
bbec97aa | 887 | long r; |
d38ceaf9 | 888 | |
6f0359ff AD |
889 | /* skip vce ring1/2 ib test for now, since it's not reliable */ |
890 | if (ring != &ring->adev->vce.ring[0]) | |
898e50d4 LL |
891 | return 0; |
892 | ||
d38ceaf9 AD |
893 | r = amdgpu_vce_get_create_msg(ring, 1, NULL); |
894 | if (r) { | |
bbec97aa | 895 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); |
d38ceaf9 AD |
896 | goto error; |
897 | } | |
898 | ||
9f2ade33 | 899 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); |
d38ceaf9 | 900 | if (r) { |
bbec97aa | 901 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); |
d38ceaf9 AD |
902 | goto error; |
903 | } | |
904 | ||
bbec97aa CK |
905 | r = fence_wait_timeout(fence, false, timeout); |
906 | if (r == 0) { | |
907 | DRM_ERROR("amdgpu: IB test timed out.\n"); | |
908 | r = -ETIMEDOUT; | |
909 | } else if (r < 0) { | |
910 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | |
d38ceaf9 AD |
911 | } else { |
912 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | |
bbec97aa | 913 | r = 0; |
d38ceaf9 AD |
914 | } |
915 | error: | |
ed40bfb8 | 916 | fence_put(fence); |
d38ceaf9 AD |
917 | return r; |
918 | } |