Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the | |
7 | * "Software"), to deal in the Software without restriction, including | |
8 | * without limitation the rights to use, copy, modify, merge, publish, | |
9 | * distribute, sub license, and/or sell copies of the Software, and to | |
10 | * permit persons to whom the Software is furnished to do so, subject to | |
11 | * the following conditions: | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 | * | |
21 | * The above copyright notice and this permission notice (including the | |
22 | * next paragraph) shall be included in all copies or substantial portions | |
23 | * of the Software. | |
24 | * | |
25 | * Authors: Christian König <christian.koenig@amd.com> | |
26 | */ | |
27 | ||
28 | #include <linux/firmware.h> | |
29 | #include <linux/module.h> | |
fdf2f6c5 | 30 | |
d38ceaf9 | 31 | #include <drm/drm.h> |
f89f8c6b | 32 | #include <drm/drm_drv.h> |
d38ceaf9 AD |
33 | |
34 | #include "amdgpu.h" | |
35 | #include "amdgpu_pm.h" | |
36 | #include "amdgpu_vce.h" | |
a190f8dc | 37 | #include "amdgpu_cs.h" |
d38ceaf9 AD |
38 | #include "cikd.h" |
39 | ||
40 | /* 1 second timeout */ | |
182830a1 | 41 | #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) |
d38ceaf9 AD |
42 | |
43 | /* Firmware Names */ | |
44 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
ce206464 AD |
45 | #define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin" |
46 | #define FIRMWARE_KABINI "amdgpu/kabini_vce.bin" | |
47 | #define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin" | |
48 | #define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin" | |
49 | #define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin" | |
d38ceaf9 | 50 | #endif |
c65444fe JZ |
51 | #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" |
52 | #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" | |
188a9bcd | 53 | #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" |
cfaba566 | 54 | #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" |
2cc0c0b5 | 55 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" |
f11ded5e LL |
56 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" |
57 | #define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" | |
58 | #define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" | |
d38ceaf9 | 59 | |
c1dc356a | 60 | #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" |
9aa52bc4 | 61 | #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" |
341b4ce2 | 62 | #define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" |
c1dc356a | 63 | |
d38ceaf9 AD |
64 | #ifdef CONFIG_DRM_AMDGPU_CIK |
65 | MODULE_FIRMWARE(FIRMWARE_BONAIRE); | |
66 | MODULE_FIRMWARE(FIRMWARE_KABINI); | |
67 | MODULE_FIRMWARE(FIRMWARE_KAVERI); | |
68 | MODULE_FIRMWARE(FIRMWARE_HAWAII); | |
69 | MODULE_FIRMWARE(FIRMWARE_MULLINS); | |
70 | #endif | |
71 | MODULE_FIRMWARE(FIRMWARE_TONGA); | |
72 | MODULE_FIRMWARE(FIRMWARE_CARRIZO); | |
188a9bcd | 73 | MODULE_FIRMWARE(FIRMWARE_FIJI); |
cfaba566 | 74 | MODULE_FIRMWARE(FIRMWARE_STONEY); |
2cc0c0b5 FC |
75 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); |
76 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); | |
c4642a47 | 77 | MODULE_FIRMWARE(FIRMWARE_POLARIS12); |
f11ded5e | 78 | MODULE_FIRMWARE(FIRMWARE_VEGAM); |
d38ceaf9 | 79 | |
c1dc356a | 80 | MODULE_FIRMWARE(FIRMWARE_VEGA10); |
9aa52bc4 | 81 | MODULE_FIRMWARE(FIRMWARE_VEGA12); |
341b4ce2 | 82 | MODULE_FIRMWARE(FIRMWARE_VEGA20); |
c1dc356a | 83 | |
d38ceaf9 | 84 | static void amdgpu_vce_idle_work_handler(struct work_struct *work); |
17523bd0 | 85 | static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
17523bd0 AD |
86 | struct dma_fence **fence); |
87 | static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |
88 | bool direct, struct dma_fence **fence); | |
d38ceaf9 AD |
89 | |
90 | /** | |
1c7f15c7 | 91 | * amdgpu_vce_sw_init - allocate memory, load vce firmware |
d38ceaf9 AD |
92 | * |
93 | * @adev: amdgpu_device pointer | |
184b762d | 94 | * @size: size for the new BO |
d38ceaf9 AD |
95 | * |
96 | * First step to get VCE online, allocate memory and load the firmware | |
97 | */ | |
e9822622 | 98 | int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) |
d38ceaf9 | 99 | { |
d38ceaf9 AD |
100 | const char *fw_name; |
101 | const struct common_firmware_header *hdr; | |
f10984a3 | 102 | unsigned int ucode_version, version_major, version_minor, binary_id; |
d38ceaf9 AD |
103 | int i, r; |
104 | ||
d38ceaf9 AD |
105 | switch (adev->asic_type) { |
106 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
107 | case CHIP_BONAIRE: | |
108 | fw_name = FIRMWARE_BONAIRE; | |
109 | break; | |
110 | case CHIP_KAVERI: | |
111 | fw_name = FIRMWARE_KAVERI; | |
112 | break; | |
113 | case CHIP_KABINI: | |
114 | fw_name = FIRMWARE_KABINI; | |
115 | break; | |
116 | case CHIP_HAWAII: | |
117 | fw_name = FIRMWARE_HAWAII; | |
118 | break; | |
119 | case CHIP_MULLINS: | |
120 | fw_name = FIRMWARE_MULLINS; | |
121 | break; | |
122 | #endif | |
123 | case CHIP_TONGA: | |
124 | fw_name = FIRMWARE_TONGA; | |
125 | break; | |
126 | case CHIP_CARRIZO: | |
127 | fw_name = FIRMWARE_CARRIZO; | |
128 | break; | |
188a9bcd AD |
129 | case CHIP_FIJI: |
130 | fw_name = FIRMWARE_FIJI; | |
131 | break; | |
cfaba566 SL |
132 | case CHIP_STONEY: |
133 | fw_name = FIRMWARE_STONEY; | |
134 | break; | |
2cc0c0b5 FC |
135 | case CHIP_POLARIS10: |
136 | fw_name = FIRMWARE_POLARIS10; | |
1b4eeea5 | 137 | break; |
2cc0c0b5 FC |
138 | case CHIP_POLARIS11: |
139 | fw_name = FIRMWARE_POLARIS11; | |
1b4eeea5 | 140 | break; |
9aa52bc4 AD |
141 | case CHIP_POLARIS12: |
142 | fw_name = FIRMWARE_POLARIS12; | |
143 | break; | |
f11ded5e LL |
144 | case CHIP_VEGAM: |
145 | fw_name = FIRMWARE_VEGAM; | |
146 | break; | |
c1dc356a LL |
147 | case CHIP_VEGA10: |
148 | fw_name = FIRMWARE_VEGA10; | |
149 | break; | |
9aa52bc4 AD |
150 | case CHIP_VEGA12: |
151 | fw_name = FIRMWARE_VEGA12; | |
c4642a47 | 152 | break; |
341b4ce2 FX |
153 | case CHIP_VEGA20: |
154 | fw_name = FIRMWARE_VEGA20; | |
155 | break; | |
d38ceaf9 AD |
156 | |
157 | default: | |
158 | return -EINVAL; | |
159 | } | |
160 | ||
ea5d4934 | 161 | r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); |
d38ceaf9 AD |
162 | if (r) { |
163 | dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", | |
164 | fw_name); | |
52215e2a | 165 | amdgpu_ucode_release(&adev->vce.fw); |
d38ceaf9 AD |
166 | return r; |
167 | } | |
168 | ||
169 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
170 | ||
171 | ucode_version = le32_to_cpu(hdr->ucode_version); | |
172 | version_major = (ucode_version >> 20) & 0xfff; | |
173 | version_minor = (ucode_version >> 8) & 0xfff; | |
174 | binary_id = ucode_version & 0xff; | |
0b437e64 | 175 | DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n", |
d38ceaf9 AD |
176 | version_major, version_minor, binary_id); |
177 | adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | | |
178 | (binary_id << 8)); | |
179 | ||
78b3c839 | 180 | r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, |
58ab2c08 CK |
181 | AMDGPU_GEM_DOMAIN_VRAM | |
182 | AMDGPU_GEM_DOMAIN_GTT, | |
183 | &adev->vce.vcpu_bo, | |
78b3c839 | 184 | &adev->vce.gpu_addr, &adev->vce.cpu_addr); |
d38ceaf9 AD |
185 | if (r) { |
186 | dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); | |
187 | return r; | |
188 | } | |
189 | ||
d38ceaf9 AD |
190 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { |
191 | atomic_set(&adev->vce.handles[i], 0); | |
192 | adev->vce.filp[i] = NULL; | |
193 | } | |
194 | ||
ebff485e CK |
195 | INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler); |
196 | mutex_init(&adev->vce.idle_mutex); | |
197 | ||
d38ceaf9 AD |
198 | return 0; |
199 | } | |
200 | ||
201 | /** | |
1c7f15c7 | 202 | * amdgpu_vce_sw_fini - free memory |
d38ceaf9 AD |
203 | * |
204 | * @adev: amdgpu_device pointer | |
205 | * | |
206 | * Last step on VCE teardown, free firmware memory | |
207 | */ | |
208 | int amdgpu_vce_sw_fini(struct amdgpu_device *adev) | |
209 | { | |
f10984a3 | 210 | unsigned int i; |
4cd00d37 | 211 | |
d38ceaf9 AD |
212 | if (adev->vce.vcpu_bo == NULL) |
213 | return 0; | |
214 | ||
cdc50176 | 215 | drm_sched_entity_destroy(&adev->vce.entity); |
c594989c | 216 | |
4cd00d37 GI |
217 | for (i = 0; i < adev->vce.num_rings; i++) |
218 | amdgpu_ring_fini(&adev->vce.ring[i]); | |
d38ceaf9 | 219 | |
52215e2a | 220 | amdgpu_ucode_release(&adev->vce.fw); |
ebff485e | 221 | mutex_destroy(&adev->vce.idle_mutex); |
d38ceaf9 | 222 | |
b61badd2 VP |
223 | amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, |
224 | (void **)&adev->vce.cpu_addr); | |
225 | ||
d38ceaf9 AD |
226 | return 0; |
227 | } | |
228 | ||
20acbed4 ED |
229 | /** |
230 | * amdgpu_vce_entity_init - init entity | |
231 | * | |
232 | * @adev: amdgpu_device pointer | |
8a0173cd | 233 | * @ring: amdgpu_ring pointer to check |
20acbed4 | 234 | * |
037b98a2 | 235 | * Initialize the entity used for handle management in the kernel driver. |
20acbed4 | 236 | */ |
037b98a2 | 237 | int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) |
20acbed4 | 238 | { |
037b98a2 AD |
239 | if (ring == &adev->vce.ring[0]) { |
240 | struct drm_gpu_scheduler *sched = &ring->sched; | |
241 | int r; | |
242 | ||
243 | r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, | |
244 | &sched, 1, NULL); | |
245 | if (r != 0) { | |
246 | DRM_ERROR("Failed setting up VCE run queue.\n"); | |
247 | return r; | |
248 | } | |
20acbed4 ED |
249 | } |
250 | ||
251 | return 0; | |
252 | } | |
253 | ||
d38ceaf9 AD |
254 | /** |
255 | * amdgpu_vce_suspend - unpin VCE fw memory | |
256 | * | |
257 | * @adev: amdgpu_device pointer | |
258 | * | |
259 | */ | |
260 | int amdgpu_vce_suspend(struct amdgpu_device *adev) | |
261 | { | |
262 | int i; | |
263 | ||
61ea6f58 RZ |
264 | cancel_delayed_work_sync(&adev->vce.idle_work); |
265 | ||
d38ceaf9 AD |
266 | if (adev->vce.vcpu_bo == NULL) |
267 | return 0; | |
268 | ||
269 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) | |
270 | if (atomic_read(&adev->vce.handles[i])) | |
271 | break; | |
272 | ||
273 | if (i == AMDGPU_MAX_VCE_HANDLES) | |
274 | return 0; | |
275 | ||
276 | /* TODO: suspending running encoding sessions isn't supported */ | |
277 | return -EINVAL; | |
278 | } | |
279 | ||
280 | /** | |
281 | * amdgpu_vce_resume - pin VCE fw memory | |
282 | * | |
283 | * @adev: amdgpu_device pointer | |
284 | * | |
285 | */ | |
286 | int amdgpu_vce_resume(struct amdgpu_device *adev) | |
287 | { | |
288 | void *cpu_addr; | |
289 | const struct common_firmware_header *hdr; | |
f10984a3 | 290 | unsigned int offset; |
f89f8c6b | 291 | int r, idx; |
d38ceaf9 AD |
292 | |
293 | if (adev->vce.vcpu_bo == NULL) | |
294 | return -EINVAL; | |
295 | ||
296 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | |
297 | if (r) { | |
298 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | |
299 | return r; | |
300 | } | |
301 | ||
302 | r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); | |
303 | if (r) { | |
304 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
305 | dev_err(adev->dev, "(%d) VCE map failed\n", r); | |
306 | return r; | |
307 | } | |
308 | ||
309 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
310 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); | |
f89f8c6b | 311 | |
c58a863b | 312 | if (drm_dev_enter(adev_to_drm(adev), &idx)) { |
f89f8c6b AG |
313 | memcpy_toio(cpu_addr, adev->vce.fw->data + offset, |
314 | adev->vce.fw->size - offset); | |
315 | drm_dev_exit(idx); | |
316 | } | |
d38ceaf9 AD |
317 | |
318 | amdgpu_bo_kunmap(adev->vce.vcpu_bo); | |
319 | ||
320 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
321 | ||
322 | return 0; | |
323 | } | |
324 | ||
325 | /** | |
326 | * amdgpu_vce_idle_work_handler - power off VCE | |
327 | * | |
328 | * @work: pointer to work structure | |
329 | * | |
330 | * power of VCE when it's not used any more | |
331 | */ | |
332 | static void amdgpu_vce_idle_work_handler(struct work_struct *work) | |
333 | { | |
334 | struct amdgpu_device *adev = | |
335 | container_of(work, struct amdgpu_device, vce.idle_work.work); | |
f10984a3 | 336 | unsigned int i, count = 0; |
d38ceaf9 | 337 | |
24c5fe56 AD |
338 | for (i = 0; i < adev->vce.num_rings; i++) |
339 | count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); | |
340 | ||
341 | if (count == 0) { | |
d38ceaf9 AD |
342 | if (adev->pm.dpm_enabled) { |
343 | amdgpu_dpm_enable_vce(adev, false); | |
344 | } else { | |
345 | amdgpu_asic_set_vce_clocks(adev, 0, 0); | |
2990a1fc AD |
346 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, |
347 | AMD_PG_STATE_GATE); | |
348 | amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, | |
349 | AMD_CG_STATE_GATE); | |
d38ceaf9 AD |
350 | } |
351 | } else { | |
182830a1 | 352 | schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); |
d38ceaf9 AD |
353 | } |
354 | } | |
355 | ||
356 | /** | |
ebff485e | 357 | * amdgpu_vce_ring_begin_use - power up VCE |
d38ceaf9 | 358 | * |
ebff485e | 359 | * @ring: amdgpu ring |
d38ceaf9 AD |
360 | * |
361 | * Make sure VCE is powerd up when we want to use it | |
362 | */ | |
ebff485e | 363 | void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) |
d38ceaf9 | 364 | { |
ebff485e CK |
365 | struct amdgpu_device *adev = ring->adev; |
366 | bool set_clocks; | |
d38ceaf9 | 367 | |
d9af2259 XY |
368 | if (amdgpu_sriov_vf(adev)) |
369 | return; | |
370 | ||
ebff485e CK |
371 | mutex_lock(&adev->vce.idle_mutex); |
372 | set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work); | |
182830a1 | 373 | if (set_clocks) { |
d38ceaf9 AD |
374 | if (adev->pm.dpm_enabled) { |
375 | amdgpu_dpm_enable_vce(adev, true); | |
376 | } else { | |
377 | amdgpu_asic_set_vce_clocks(adev, 53300, 40000); | |
2990a1fc AD |
378 | amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, |
379 | AMD_CG_STATE_UNGATE); | |
380 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, | |
381 | AMD_PG_STATE_UNGATE); | |
28ed5504 | 382 | |
d38ceaf9 AD |
383 | } |
384 | } | |
ebff485e CK |
385 | mutex_unlock(&adev->vce.idle_mutex); |
386 | } | |
387 | ||
388 | /** | |
389 | * amdgpu_vce_ring_end_use - power VCE down | |
390 | * | |
391 | * @ring: amdgpu ring | |
392 | * | |
393 | * Schedule work to power VCE down again | |
394 | */ | |
395 | void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) | |
396 | { | |
14a8032a ML |
397 | if (!amdgpu_sriov_vf(ring->adev)) |
398 | schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); | |
d38ceaf9 AD |
399 | } |
400 | ||
401 | /** | |
402 | * amdgpu_vce_free_handles - free still open VCE handles | |
403 | * | |
404 | * @adev: amdgpu_device pointer | |
405 | * @filp: drm file pointer | |
406 | * | |
407 | * Close all VCE handles still open by this file pointer | |
408 | */ | |
409 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |
410 | { | |
411 | struct amdgpu_ring *ring = &adev->vce.ring[0]; | |
412 | int i, r; | |
f10984a3 | 413 | |
d38ceaf9 AD |
414 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { |
415 | uint32_t handle = atomic_read(&adev->vce.handles[i]); | |
182830a1 | 416 | |
d38ceaf9 AD |
417 | if (!handle || adev->vce.filp[i] != filp) |
418 | continue; | |
419 | ||
9f2ade33 | 420 | r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); |
d38ceaf9 AD |
421 | if (r) |
422 | DRM_ERROR("Error destroying VCE handle (%d)!\n", r); | |
423 | ||
424 | adev->vce.filp[i] = NULL; | |
425 | atomic_set(&adev->vce.handles[i], 0); | |
426 | } | |
427 | } | |
428 | ||
429 | /** | |
430 | * amdgpu_vce_get_create_msg - generate a VCE create msg | |
431 | * | |
d38ceaf9 AD |
432 | * @ring: ring we should submit the msg to |
433 | * @handle: VCE session handle to use | |
434 | * @fence: optional fence to return | |
435 | * | |
436 | * Open up a stream for HW test | |
437 | */ | |
17523bd0 | 438 | static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
17523bd0 | 439 | struct dma_fence **fence) |
d38ceaf9 | 440 | { |
f10984a3 | 441 | const unsigned int ib_size_dw = 1024; |
d71518b5 CK |
442 | struct amdgpu_job *job; |
443 | struct amdgpu_ib *ib; | |
cb9038aa | 444 | struct amdgpu_ib ib_msg; |
f54d1867 | 445 | struct dma_fence *f = NULL; |
569557e5 | 446 | uint64_t addr; |
d38ceaf9 AD |
447 | int i, r; |
448 | ||
f7d66fb2 CK |
449 | r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, |
450 | AMDGPU_FENCE_OWNER_UNDEFINED, | |
451 | ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, | |
452 | &job); | |
d71518b5 | 453 | if (r) |
d38ceaf9 | 454 | return r; |
d71518b5 | 455 | |
cb9038aa | 456 | memset(&ib_msg, 0, sizeof(ib_msg)); |
457 | /* only one gpu page is needed, alloc +1 page to make addr aligned. */ | |
458 | r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, | |
459 | AMDGPU_IB_POOL_DIRECT, | |
460 | &ib_msg); | |
461 | if (r) | |
462 | goto err; | |
d38ceaf9 | 463 | |
cb9038aa | 464 | ib = &job->ibs[0]; |
465 | /* let addr point to page boundary */ | |
466 | addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr); | |
d38ceaf9 AD |
467 | |
468 | /* stitch together an VCE create msg */ | |
8128765c CZ |
469 | ib->length_dw = 0; |
470 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
471 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
472 | ib->ptr[ib->length_dw++] = handle; | |
473 | ||
d66f8e48 LL |
474 | if ((ring->adev->vce.fw_version >> 24) >= 52) |
475 | ib->ptr[ib->length_dw++] = 0x00000040; /* len */ | |
476 | else | |
477 | ib->ptr[ib->length_dw++] = 0x00000030; /* len */ | |
8128765c CZ |
478 | ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ |
479 | ib->ptr[ib->length_dw++] = 0x00000000; | |
480 | ib->ptr[ib->length_dw++] = 0x00000042; | |
481 | ib->ptr[ib->length_dw++] = 0x0000000a; | |
482 | ib->ptr[ib->length_dw++] = 0x00000001; | |
483 | ib->ptr[ib->length_dw++] = 0x00000080; | |
484 | ib->ptr[ib->length_dw++] = 0x00000060; | |
485 | ib->ptr[ib->length_dw++] = 0x00000100; | |
486 | ib->ptr[ib->length_dw++] = 0x00000100; | |
487 | ib->ptr[ib->length_dw++] = 0x0000000c; | |
488 | ib->ptr[ib->length_dw++] = 0x00000000; | |
d66f8e48 LL |
489 | if ((ring->adev->vce.fw_version >> 24) >= 52) { |
490 | ib->ptr[ib->length_dw++] = 0x00000000; | |
491 | ib->ptr[ib->length_dw++] = 0x00000000; | |
492 | ib->ptr[ib->length_dw++] = 0x00000000; | |
493 | ib->ptr[ib->length_dw++] = 0x00000000; | |
494 | } | |
8128765c CZ |
495 | |
496 | ib->ptr[ib->length_dw++] = 0x00000014; /* len */ | |
497 | ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ | |
569557e5 AD |
498 | ib->ptr[ib->length_dw++] = upper_32_bits(addr); |
499 | ib->ptr[ib->length_dw++] = addr; | |
8128765c CZ |
500 | ib->ptr[ib->length_dw++] = 0x00000001; |
501 | ||
502 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
503 | ib->ptr[i] = 0x0; | |
504 | ||
ee913fd9 | 505 | r = amdgpu_job_submit_direct(job, ring, &f); |
0014952b | 506 | amdgpu_ib_free(&ib_msg, f); |
8128765c CZ |
507 | if (r) |
508 | goto err; | |
9f2ade33 | 509 | |
d38ceaf9 | 510 | if (fence) |
f54d1867 CW |
511 | *fence = dma_fence_get(f); |
512 | dma_fence_put(f); | |
cadf97b1 | 513 | return 0; |
d71518b5 | 514 | |
8128765c | 515 | err: |
d71518b5 | 516 | amdgpu_job_free(job); |
d38ceaf9 AD |
517 | return r; |
518 | } | |
519 | ||
520 | /** | |
521 | * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg | |
522 | * | |
d38ceaf9 AD |
523 | * @ring: ring we should submit the msg to |
524 | * @handle: VCE session handle to use | |
184b762d | 525 | * @direct: direct or delayed pool |
d38ceaf9 AD |
526 | * @fence: optional fence to return |
527 | * | |
528 | * Close up a stream for HW test or if userspace failed to do so | |
529 | */ | |
17523bd0 AD |
530 | static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
531 | bool direct, struct dma_fence **fence) | |
d38ceaf9 | 532 | { |
f10984a3 | 533 | const unsigned int ib_size_dw = 1024; |
d71518b5 CK |
534 | struct amdgpu_job *job; |
535 | struct amdgpu_ib *ib; | |
f54d1867 | 536 | struct dma_fence *f = NULL; |
d38ceaf9 AD |
537 | int i, r; |
538 | ||
f7d66fb2 CK |
539 | r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, |
540 | AMDGPU_FENCE_OWNER_UNDEFINED, | |
541 | ib_size_dw * 4, | |
9ecefb19 CK |
542 | direct ? AMDGPU_IB_POOL_DIRECT : |
543 | AMDGPU_IB_POOL_DELAYED, &job); | |
d71518b5 | 544 | if (r) |
d38ceaf9 | 545 | return r; |
d38ceaf9 | 546 | |
d71518b5 | 547 | ib = &job->ibs[0]; |
d38ceaf9 AD |
548 | |
549 | /* stitch together an VCE destroy msg */ | |
8128765c CZ |
550 | ib->length_dw = 0; |
551 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
552 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
553 | ib->ptr[ib->length_dw++] = handle; | |
554 | ||
99453a9e RZ |
555 | ib->ptr[ib->length_dw++] = 0x00000020; /* len */ |
556 | ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ | |
557 | ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ | |
558 | ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ | |
559 | ib->ptr[ib->length_dw++] = 0x00000000; | |
560 | ib->ptr[ib->length_dw++] = 0x00000000; | |
561 | ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ | |
562 | ib->ptr[ib->length_dw++] = 0x00000000; | |
8128765c CZ |
563 | |
564 | ib->ptr[ib->length_dw++] = 0x00000008; /* len */ | |
565 | ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ | |
566 | ||
567 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
568 | ib->ptr[i] = 0x0; | |
9f2ade33 | 569 | |
ee913fd9 CK |
570 | if (direct) |
571 | r = amdgpu_job_submit_direct(job, ring, &f); | |
572 | else | |
f7d66fb2 | 573 | f = amdgpu_job_submit(job); |
ee913fd9 CK |
574 | if (r) |
575 | goto err; | |
9f2ade33 | 576 | |
d38ceaf9 | 577 | if (fence) |
f54d1867 CW |
578 | *fence = dma_fence_get(f); |
579 | dma_fence_put(f); | |
cadf97b1 | 580 | return 0; |
d71518b5 | 581 | |
8128765c | 582 | err: |
d71518b5 | 583 | amdgpu_job_free(job); |
d38ceaf9 AD |
584 | return r; |
585 | } | |
586 | ||
23594318 | 587 | /** |
1c7f15c7 | 588 | * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary |
23594318 | 589 | * |
a37558e6 | 590 | * @p: cs parser |
cdc7893f | 591 | * @ib: indirect buffer to use |
23594318 CK |
592 | * @lo: address of lower dword |
593 | * @hi: address of higher dword | |
594 | * @size: minimum size | |
595 | * @index: bs/fb index | |
596 | * | |
597 | * Make sure that no BO cross a 4GB boundary. | |
598 | */ | |
cdc7893f CK |
599 | static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, |
600 | struct amdgpu_ib *ib, int lo, int hi, | |
f10984a3 | 601 | unsigned int size, int32_t index) |
23594318 CK |
602 | { |
603 | int64_t offset = ((uint64_t)size) * ((int64_t)index); | |
19be5570 | 604 | struct ttm_operation_ctx ctx = { false, false }; |
23594318 | 605 | struct amdgpu_bo_va_mapping *mapping; |
f10984a3 | 606 | unsigned int i, fpfn, lpfn; |
23594318 CK |
607 | struct amdgpu_bo *bo; |
608 | uint64_t addr; | |
609 | int r; | |
610 | ||
cdc7893f CK |
611 | addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | |
612 | ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; | |
23594318 CK |
613 | if (index >= 0) { |
614 | addr += offset; | |
615 | fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; | |
616 | lpfn = 0x100000000ULL >> PAGE_SHIFT; | |
617 | } else { | |
618 | fpfn = 0; | |
619 | lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT; | |
620 | } | |
621 | ||
622 | r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); | |
623 | if (r) { | |
f10984a3 | 624 | DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", |
23594318 CK |
625 | addr, lo, hi, size, index); |
626 | return r; | |
627 | } | |
628 | ||
629 | for (i = 0; i < bo->placement.num_placement; ++i) { | |
630 | bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn); | |
4c63abb2 CK |
631 | bo->placements[i].lpfn = bo->placements[i].lpfn ? |
632 | min(bo->placements[i].lpfn, lpfn) : lpfn; | |
23594318 | 633 | } |
19be5570 | 634 | return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
23594318 CK |
635 | } |
636 | ||
637 | ||
d38ceaf9 AD |
638 | /** |
639 | * amdgpu_vce_cs_reloc - command submission relocation | |
640 | * | |
641 | * @p: parser context | |
cdc7893f | 642 | * @ib: indirect buffer to use |
d38ceaf9 AD |
643 | * @lo: address of lower dword |
644 | * @hi: address of higher dword | |
f1689ec1 | 645 | * @size: minimum size |
184b762d | 646 | * @index: bs/fb index |
d38ceaf9 AD |
647 | * |
648 | * Patch relocation inside command stream with real buffer address | |
649 | */ | |
cdc7893f | 650 | static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, |
f10984a3 | 651 | int lo, int hi, unsigned int size, uint32_t index) |
d38ceaf9 AD |
652 | { |
653 | struct amdgpu_bo_va_mapping *mapping; | |
d38ceaf9 AD |
654 | struct amdgpu_bo *bo; |
655 | uint64_t addr; | |
9cca0b8e | 656 | int r; |
d38ceaf9 | 657 | |
dc78330a CK |
658 | if (index == 0xffffffff) |
659 | index = 0; | |
660 | ||
cdc7893f CK |
661 | addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | |
662 | ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; | |
dc78330a | 663 | addr += ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 | 664 | |
9cca0b8e CK |
665 | r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); |
666 | if (r) { | |
f10984a3 | 667 | DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", |
dc78330a | 668 | addr, lo, hi, size, index); |
9cca0b8e | 669 | return r; |
d38ceaf9 AD |
670 | } |
671 | ||
f1689ec1 | 672 | if ((addr + (uint64_t)size) > |
a9f87f64 | 673 | (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { |
f10984a3 | 674 | DRM_ERROR("BO too small for addr 0x%010llx %d %d\n", |
f1689ec1 CK |
675 | addr, lo, hi); |
676 | return -EINVAL; | |
677 | } | |
678 | ||
a9f87f64 | 679 | addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; |
d38ceaf9 | 680 | addr += amdgpu_bo_gpu_offset(bo); |
dc78330a | 681 | addr -= ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 | 682 | |
cdc7893f CK |
683 | amdgpu_ib_set_value(ib, lo, lower_32_bits(addr)); |
684 | amdgpu_ib_set_value(ib, hi, upper_32_bits(addr)); | |
d38ceaf9 AD |
685 | |
686 | return 0; | |
687 | } | |
688 | ||
f1689ec1 CK |
689 | /** |
690 | * amdgpu_vce_validate_handle - validate stream handle | |
691 | * | |
692 | * @p: parser context | |
693 | * @handle: handle to validate | |
2f4b9368 | 694 | * @allocated: allocated a new handle? |
f1689ec1 CK |
695 | * |
696 | * Validates the handle and return the found session index or -EINVAL | |
f10984a3 | 697 | * we don't have another free session index. |
f1689ec1 CK |
698 | */ |
699 | static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, | |
e5223214 | 700 | uint32_t handle, uint32_t *allocated) |
f1689ec1 | 701 | { |
f10984a3 | 702 | unsigned int i; |
f1689ec1 CK |
703 | |
704 | /* validate the handle */ | |
705 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
2f4b9368 CK |
706 | if (atomic_read(&p->adev->vce.handles[i]) == handle) { |
707 | if (p->adev->vce.filp[i] != p->filp) { | |
708 | DRM_ERROR("VCE handle collision detected!\n"); | |
709 | return -EINVAL; | |
710 | } | |
f1689ec1 | 711 | return i; |
2f4b9368 | 712 | } |
f1689ec1 CK |
713 | } |
714 | ||
715 | /* handle not found try to alloc a new one */ | |
716 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
717 | if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { | |
718 | p->adev->vce.filp[i] = p->filp; | |
719 | p->adev->vce.img_size[i] = 0; | |
e5223214 | 720 | *allocated |= 1 << i; |
f1689ec1 CK |
721 | return i; |
722 | } | |
723 | } | |
724 | ||
725 | DRM_ERROR("No more free VCE handles!\n"); | |
726 | return -EINVAL; | |
727 | } | |
728 | ||
d38ceaf9 | 729 | /** |
1c7f15c7 | 730 | * amdgpu_vce_ring_parse_cs - parse and validate the command stream |
d38ceaf9 AD |
731 | * |
732 | * @p: parser context | |
cdc7893f CK |
733 | * @job: the job to parse |
734 | * @ib: the IB to patch | |
d38ceaf9 | 735 | */ |
cdc7893f CK |
736 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, |
737 | struct amdgpu_job *job, | |
738 | struct amdgpu_ib *ib) | |
d38ceaf9 | 739 | { |
f10984a3 | 740 | unsigned int fb_idx = 0, bs_idx = 0; |
f1689ec1 | 741 | int session_idx = -1; |
e5223214 CK |
742 | uint32_t destroyed = 0; |
743 | uint32_t created = 0; | |
744 | uint32_t allocated = 0; | |
f1689ec1 | 745 | uint32_t tmp, handle = 0; |
88a9a467 JZ |
746 | uint32_t dummy = 0xffffffff; |
747 | uint32_t *size = &dummy; | |
f10984a3 | 748 | unsigned int idx; |
23594318 | 749 | int i, r = 0; |
c855e250 | 750 | |
cdc7893f | 751 | job->vm = NULL; |
45088efc | 752 | |
23594318 | 753 | for (idx = 0; idx < ib->length_dw;) { |
cdc7893f CK |
754 | uint32_t len = amdgpu_ib_get_value(ib, idx); |
755 | uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); | |
d38ceaf9 AD |
756 | |
757 | if ((len < 8) || (len & 3)) { | |
758 | DRM_ERROR("invalid VCE command length (%d)!\n", len); | |
2f4b9368 CK |
759 | r = -EINVAL; |
760 | goto out; | |
d38ceaf9 AD |
761 | } |
762 | ||
23594318 CK |
763 | switch (cmd) { |
764 | case 0x00000002: /* task info */ | |
cdc7893f CK |
765 | fb_idx = amdgpu_ib_get_value(ib, idx + 6); |
766 | bs_idx = amdgpu_ib_get_value(ib, idx + 7); | |
23594318 CK |
767 | break; |
768 | ||
769 | case 0x03000001: /* encode */ | |
cdc7893f CK |
770 | r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9, |
771 | 0, 0); | |
23594318 CK |
772 | if (r) |
773 | goto out; | |
774 | ||
cdc7893f CK |
775 | r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11, |
776 | 0, 0); | |
23594318 CK |
777 | if (r) |
778 | goto out; | |
779 | break; | |
780 | ||
781 | case 0x05000001: /* context buffer */ | |
cdc7893f CK |
782 | r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, |
783 | 0, 0); | |
23594318 CK |
784 | if (r) |
785 | goto out; | |
786 | break; | |
787 | ||
788 | case 0x05000004: /* video bitstream buffer */ | |
cdc7893f CK |
789 | tmp = amdgpu_ib_get_value(ib, idx + 4); |
790 | r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, | |
23594318 CK |
791 | tmp, bs_idx); |
792 | if (r) | |
793 | goto out; | |
794 | break; | |
795 | ||
796 | case 0x05000005: /* feedback buffer */ | |
cdc7893f | 797 | r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, |
23594318 CK |
798 | 4096, fb_idx); |
799 | if (r) | |
800 | goto out; | |
801 | break; | |
1eb1547f JZ |
802 | |
803 | case 0x0500000d: /* MV buffer */ | |
cdc7893f CK |
804 | r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, |
805 | 0, 0); | |
1eb1547f JZ |
806 | if (r) |
807 | goto out; | |
808 | ||
cdc7893f CK |
809 | r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7, |
810 | 0, 0); | |
1eb1547f JZ |
811 | if (r) |
812 | goto out; | |
813 | break; | |
23594318 CK |
814 | } |
815 | ||
816 | idx += len / 4; | |
817 | } | |
818 | ||
819 | for (idx = 0; idx < ib->length_dw;) { | |
cdc7893f CK |
820 | uint32_t len = amdgpu_ib_get_value(ib, idx); |
821 | uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); | |
23594318 | 822 | |
d38ceaf9 | 823 | switch (cmd) { |
182830a1 | 824 | case 0x00000001: /* session */ |
cdc7893f | 825 | handle = amdgpu_ib_get_value(ib, idx + 2); |
2f4b9368 CK |
826 | session_idx = amdgpu_vce_validate_handle(p, handle, |
827 | &allocated); | |
e5223214 CK |
828 | if (session_idx < 0) { |
829 | r = session_idx; | |
830 | goto out; | |
831 | } | |
f1689ec1 | 832 | size = &p->adev->vce.img_size[session_idx]; |
d38ceaf9 AD |
833 | break; |
834 | ||
182830a1 | 835 | case 0x00000002: /* task info */ |
cdc7893f CK |
836 | fb_idx = amdgpu_ib_get_value(ib, idx + 6); |
837 | bs_idx = amdgpu_ib_get_value(ib, idx + 7); | |
f1689ec1 CK |
838 | break; |
839 | ||
182830a1 | 840 | case 0x01000001: /* create */ |
e5223214 CK |
841 | created |= 1 << session_idx; |
842 | if (destroyed & (1 << session_idx)) { | |
843 | destroyed &= ~(1 << session_idx); | |
844 | allocated |= 1 << session_idx; | |
845 | ||
846 | } else if (!(allocated & (1 << session_idx))) { | |
2f4b9368 CK |
847 | DRM_ERROR("Handle already in use!\n"); |
848 | r = -EINVAL; | |
849 | goto out; | |
850 | } | |
851 | ||
cdc7893f CK |
852 | *size = amdgpu_ib_get_value(ib, idx + 8) * |
853 | amdgpu_ib_get_value(ib, idx + 10) * | |
f1689ec1 CK |
854 | 8 * 3 / 2; |
855 | break; | |
856 | ||
182830a1 CK |
857 | case 0x04000001: /* config extension */ |
858 | case 0x04000002: /* pic control */ | |
859 | case 0x04000005: /* rate control */ | |
860 | case 0x04000007: /* motion estimation */ | |
861 | case 0x04000008: /* rdo */ | |
862 | case 0x04000009: /* vui */ | |
863 | case 0x05000002: /* auxiliary buffer */ | |
4f827785 | 864 | case 0x05000009: /* clock table */ |
d38ceaf9 AD |
865 | break; |
866 | ||
5eeda8a4 AD |
867 | case 0x0500000c: /* hw config */ |
868 | switch (p->adev->asic_type) { | |
869 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
870 | case CHIP_KAVERI: | |
871 | case CHIP_MULLINS: | |
872 | #endif | |
873 | case CHIP_CARRIZO: | |
874 | break; | |
875 | default: | |
876 | r = -EINVAL; | |
877 | goto out; | |
878 | } | |
879 | break; | |
880 | ||
182830a1 | 881 | case 0x03000001: /* encode */ |
cdc7893f | 882 | r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9, |
dc78330a | 883 | *size, 0); |
d38ceaf9 | 884 | if (r) |
2f4b9368 | 885 | goto out; |
d38ceaf9 | 886 | |
cdc7893f | 887 | r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11, |
dc78330a | 888 | *size / 3, 0); |
d38ceaf9 | 889 | if (r) |
2f4b9368 | 890 | goto out; |
d38ceaf9 AD |
891 | break; |
892 | ||
182830a1 | 893 | case 0x02000001: /* destroy */ |
e5223214 | 894 | destroyed |= 1 << session_idx; |
d38ceaf9 AD |
895 | break; |
896 | ||
182830a1 | 897 | case 0x05000001: /* context buffer */ |
cdc7893f | 898 | r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, |
dc78330a | 899 | *size * 2, 0); |
f1689ec1 | 900 | if (r) |
2f4b9368 | 901 | goto out; |
f1689ec1 CK |
902 | break; |
903 | ||
182830a1 | 904 | case 0x05000004: /* video bitstream buffer */ |
cdc7893f CK |
905 | tmp = amdgpu_ib_get_value(ib, idx + 4); |
906 | r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, | |
dc78330a | 907 | tmp, bs_idx); |
f1689ec1 | 908 | if (r) |
2f4b9368 | 909 | goto out; |
f1689ec1 CK |
910 | break; |
911 | ||
182830a1 | 912 | case 0x05000005: /* feedback buffer */ |
cdc7893f | 913 | r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, |
dc78330a | 914 | 4096, fb_idx); |
d38ceaf9 | 915 | if (r) |
2f4b9368 | 916 | goto out; |
d38ceaf9 AD |
917 | break; |
918 | ||
1eb1547f | 919 | case 0x0500000d: /* MV buffer */ |
cdc7893f CK |
920 | r = amdgpu_vce_cs_reloc(p, ib, idx + 3, |
921 | idx + 2, *size, 0); | |
1eb1547f JZ |
922 | if (r) |
923 | goto out; | |
924 | ||
cdc7893f CK |
925 | r = amdgpu_vce_cs_reloc(p, ib, idx + 8, |
926 | idx + 7, *size / 12, 0); | |
1eb1547f JZ |
927 | if (r) |
928 | goto out; | |
929 | break; | |
930 | ||
d38ceaf9 AD |
931 | default: |
932 | DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); | |
2f4b9368 CK |
933 | r = -EINVAL; |
934 | goto out; | |
d38ceaf9 AD |
935 | } |
936 | ||
f1689ec1 CK |
937 | if (session_idx == -1) { |
938 | DRM_ERROR("no session command at start of IB\n"); | |
2f4b9368 CK |
939 | r = -EINVAL; |
940 | goto out; | |
f1689ec1 CK |
941 | } |
942 | ||
d38ceaf9 AD |
943 | idx += len / 4; |
944 | } | |
945 | ||
e5223214 | 946 | if (allocated & ~created) { |
2f4b9368 CK |
947 | DRM_ERROR("New session without create command!\n"); |
948 | r = -ENOENT; | |
949 | } | |
950 | ||
951 | out: | |
e5223214 CK |
952 | if (!r) { |
953 | /* No error, free all destroyed handle slots */ | |
954 | tmp = destroyed; | |
955 | } else { | |
956 | /* Error during parsing, free all allocated handle slots */ | |
957 | tmp = allocated; | |
d38ceaf9 AD |
958 | } |
959 | ||
e5223214 CK |
960 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) |
961 | if (tmp & (1 << i)) | |
962 | atomic_set(&p->adev->vce.handles[i], 0); | |
963 | ||
2f4b9368 | 964 | return r; |
d38ceaf9 AD |
965 | } |
966 | ||
98614701 | 967 | /** |
1c7f15c7 | 968 | * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode |
98614701 CK |
969 | * |
970 | * @p: parser context | |
cdc7893f CK |
971 | * @job: the job to parse |
972 | * @ib: the IB to patch | |
98614701 | 973 | */ |
cdc7893f CK |
974 | int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, |
975 | struct amdgpu_job *job, | |
976 | struct amdgpu_ib *ib) | |
98614701 | 977 | { |
98614701 CK |
978 | int session_idx = -1; |
979 | uint32_t destroyed = 0; | |
980 | uint32_t created = 0; | |
981 | uint32_t allocated = 0; | |
982 | uint32_t tmp, handle = 0; | |
983 | int i, r = 0, idx = 0; | |
984 | ||
985 | while (idx < ib->length_dw) { | |
cdc7893f CK |
986 | uint32_t len = amdgpu_ib_get_value(ib, idx); |
987 | uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); | |
98614701 CK |
988 | |
989 | if ((len < 8) || (len & 3)) { | |
990 | DRM_ERROR("invalid VCE command length (%d)!\n", len); | |
991 | r = -EINVAL; | |
992 | goto out; | |
993 | } | |
994 | ||
995 | switch (cmd) { | |
996 | case 0x00000001: /* session */ | |
cdc7893f | 997 | handle = amdgpu_ib_get_value(ib, idx + 2); |
98614701 CK |
998 | session_idx = amdgpu_vce_validate_handle(p, handle, |
999 | &allocated); | |
1000 | if (session_idx < 0) { | |
1001 | r = session_idx; | |
1002 | goto out; | |
1003 | } | |
1004 | break; | |
1005 | ||
1006 | case 0x01000001: /* create */ | |
1007 | created |= 1 << session_idx; | |
1008 | if (destroyed & (1 << session_idx)) { | |
1009 | destroyed &= ~(1 << session_idx); | |
1010 | allocated |= 1 << session_idx; | |
1011 | ||
1012 | } else if (!(allocated & (1 << session_idx))) { | |
1013 | DRM_ERROR("Handle already in use!\n"); | |
1014 | r = -EINVAL; | |
1015 | goto out; | |
1016 | } | |
1017 | ||
1018 | break; | |
1019 | ||
1020 | case 0x02000001: /* destroy */ | |
1021 | destroyed |= 1 << session_idx; | |
1022 | break; | |
1023 | ||
1024 | default: | |
1025 | break; | |
1026 | } | |
1027 | ||
1028 | if (session_idx == -1) { | |
1029 | DRM_ERROR("no session command at start of IB\n"); | |
1030 | r = -EINVAL; | |
1031 | goto out; | |
1032 | } | |
1033 | ||
1034 | idx += len / 4; | |
1035 | } | |
1036 | ||
1037 | if (allocated & ~created) { | |
1038 | DRM_ERROR("New session without create command!\n"); | |
1039 | r = -ENOENT; | |
1040 | } | |
1041 | ||
1042 | out: | |
1043 | if (!r) { | |
1044 | /* No error, free all destroyed handle slots */ | |
1045 | tmp = destroyed; | |
98614701 CK |
1046 | } else { |
1047 | /* Error during parsing, free all allocated handle slots */ | |
1048 | tmp = allocated; | |
1049 | } | |
1050 | ||
1051 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) | |
1052 | if (tmp & (1 << i)) | |
1053 | atomic_set(&p->adev->vce.handles[i], 0); | |
1054 | ||
1055 | return r; | |
1056 | } | |
1057 | ||
d38ceaf9 AD |
1058 | /** |
1059 | * amdgpu_vce_ring_emit_ib - execute indirect buffer | |
1060 | * | |
1061 | * @ring: engine to use | |
184b762d | 1062 | * @job: job to retrieve vmid from |
d38ceaf9 | 1063 | * @ib: the IB to execute |
184b762d | 1064 | * @flags: unused |
d38ceaf9 AD |
1065 | * |
1066 | */ | |
34955e03 RZ |
1067 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, |
1068 | struct amdgpu_job *job, | |
1069 | struct amdgpu_ib *ib, | |
c4c905ec | 1070 | uint32_t flags) |
d38ceaf9 AD |
1071 | { |
1072 | amdgpu_ring_write(ring, VCE_CMD_IB); | |
1073 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | |
1074 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | |
1075 | amdgpu_ring_write(ring, ib->length_dw); | |
1076 | } | |
1077 | ||
1078 | /** | |
1079 | * amdgpu_vce_ring_emit_fence - add a fence command to the ring | |
1080 | * | |
1081 | * @ring: engine to use | |
184b762d LJ |
1082 | * @addr: address |
1083 | * @seq: sequence number | |
1084 | * @flags: fence related flags | |
d38ceaf9 AD |
1085 | * |
1086 | */ | |
1087 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | |
f10984a3 | 1088 | unsigned int flags) |
d38ceaf9 | 1089 | { |
890ee23f | 1090 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
d38ceaf9 AD |
1091 | |
1092 | amdgpu_ring_write(ring, VCE_CMD_FENCE); | |
1093 | amdgpu_ring_write(ring, addr); | |
1094 | amdgpu_ring_write(ring, upper_32_bits(addr)); | |
1095 | amdgpu_ring_write(ring, seq); | |
1096 | amdgpu_ring_write(ring, VCE_CMD_TRAP); | |
1097 | amdgpu_ring_write(ring, VCE_CMD_END); | |
1098 | } | |
1099 | ||
1100 | /** | |
1101 | * amdgpu_vce_ring_test_ring - test if VCE ring is working | |
1102 | * | |
1103 | * @ring: the engine to test on | |
1104 | * | |
1105 | */ | |
1106 | int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |
1107 | { | |
1108 | struct amdgpu_device *adev = ring->adev; | |
ce0e22f5 | 1109 | uint32_t rptr; |
f10984a3 | 1110 | unsigned int i; |
a2f537e0 XY |
1111 | int r, timeout = adev->usec_timeout; |
1112 | ||
a1b9022a | 1113 | /* skip ring test for sriov*/ |
a2f537e0 | 1114 | if (amdgpu_sriov_vf(adev)) |
a1b9022a | 1115 | return 0; |
d38ceaf9 | 1116 | |
a27de35c | 1117 | r = amdgpu_ring_alloc(ring, 16); |
dc9eeff8 | 1118 | if (r) |
d38ceaf9 | 1119 | return r; |
dc9eeff8 | 1120 | |
ce0e22f5 LL |
1121 | rptr = amdgpu_ring_get_rptr(ring); |
1122 | ||
d38ceaf9 | 1123 | amdgpu_ring_write(ring, VCE_CMD_END); |
a27de35c | 1124 | amdgpu_ring_commit(ring); |
d38ceaf9 | 1125 | |
a2f537e0 | 1126 | for (i = 0; i < timeout; i++) { |
d38ceaf9 AD |
1127 | if (amdgpu_ring_get_rptr(ring) != rptr) |
1128 | break; | |
c366be54 | 1129 | udelay(1); |
d38ceaf9 AD |
1130 | } |
1131 | ||
dc9eeff8 | 1132 | if (i >= timeout) |
d38ceaf9 | 1133 | r = -ETIMEDOUT; |
d38ceaf9 AD |
1134 | |
1135 | return r; | |
1136 | } | |
1137 | ||
1138 | /** | |
1139 | * amdgpu_vce_ring_test_ib - test if VCE IBs are working | |
1140 | * | |
1141 | * @ring: the engine to test on | |
184b762d | 1142 | * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT |
d38ceaf9 AD |
1143 | * |
1144 | */ | |
bbec97aa | 1145 | int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
d38ceaf9 | 1146 | { |
f54d1867 | 1147 | struct dma_fence *fence = NULL; |
bbec97aa | 1148 | long r; |
d38ceaf9 | 1149 | |
6f0359ff AD |
1150 | /* skip vce ring1/2 ib test for now, since it's not reliable */ |
1151 | if (ring != &ring->adev->vce.ring[0]) | |
898e50d4 LL |
1152 | return 0; |
1153 | ||
cb9038aa | 1154 | r = amdgpu_vce_get_create_msg(ring, 1, NULL); |
98079389 | 1155 | if (r) |
d38ceaf9 | 1156 | goto error; |
d38ceaf9 | 1157 | |
9f2ade33 | 1158 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); |
98079389 | 1159 | if (r) |
d38ceaf9 | 1160 | goto error; |
d38ceaf9 | 1161 | |
f54d1867 | 1162 | r = dma_fence_wait_timeout(fence, false, timeout); |
98079389 | 1163 | if (r == 0) |
bbec97aa | 1164 | r = -ETIMEDOUT; |
98079389 | 1165 | else if (r > 0) |
bbec97aa | 1166 | r = 0; |
98079389 | 1167 | |
d38ceaf9 | 1168 | error: |
f54d1867 | 1169 | dma_fence_put(fence); |
d38ceaf9 AD |
1170 | return r; |
1171 | } | |
080e613c SS |
1172 | |
1173 | enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring) | |
1174 | { | |
f10984a3 | 1175 | switch (ring) { |
080e613c SS |
1176 | case 0: |
1177 | return AMDGPU_RING_PRIO_0; | |
1178 | case 1: | |
1179 | return AMDGPU_RING_PRIO_1; | |
1180 | case 2: | |
1181 | return AMDGPU_RING_PRIO_2; | |
1182 | default: | |
1183 | return AMDGPU_RING_PRIO_0; | |
1184 | } | |
1185 | } |