drm/scheduler: remove unused parameter
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61         struct amdgpu_device *adev = ring->adev;
62
63         if (ring == &adev->vce.ring[0])
64                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65         else if (ring == &adev->vce.ring[1])
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67         else
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80         struct amdgpu_device *adev = ring->adev;
81
82         if (ring->use_doorbell)
83                 return adev->wb.wb[ring->wptr_offs];
84
85         if (ring == &adev->vce.ring[0])
86                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87         else if (ring == &adev->vce.ring[1])
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89         else
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102         struct amdgpu_device *adev = ring->adev;
103
104         if (ring->use_doorbell) {
105                 /* XXX check if swapping is necessary on BE */
106                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108                 return;
109         }
110
111         if (ring == &adev->vce.ring[0])
112                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113                         lower_32_bits(ring->wptr));
114         else if (ring == &adev->vce.ring[1])
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116                         lower_32_bits(ring->wptr));
117         else
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119                         lower_32_bits(ring->wptr));
120 }
121
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124         int i, j;
125
126         for (i = 0; i < 10; ++i) {
127                 for (j = 0; j < 100; ++j) {
128                         uint32_t status =
129                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132                                 return 0;
133                         mdelay(10);
134                 }
135
136                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140                 mdelay(10);
141                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144
145         }
146
147         return -ETIMEDOUT;
148 }
149
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151                                 struct amdgpu_mm_table *table)
152 {
153         uint32_t data = 0, loop;
154         uint64_t addr = table->gpu_addr;
155         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156         uint32_t size;
157
158         size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164         /* 2, update vmid of descriptor */
165         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170         /* 3, notify mmsch about the size of this descriptor */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173         /* 4, set resp to zero */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
177         adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
178         adev->vce.ring[0].wptr = 0;
179         adev->vce.ring[0].wptr_old = 0;
180
181         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
182         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
183
184         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
185         loop = 1000;
186         while ((data & 0x10000002) != 0x10000002) {
187                 udelay(10);
188                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
189                 loop--;
190                 if (!loop)
191                         break;
192         }
193
194         if (!loop) {
195                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
196                 return -EBUSY;
197         }
198
199         return 0;
200 }
201
202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
203 {
204         struct amdgpu_ring *ring;
205         uint32_t offset, size;
206         uint32_t table_size = 0;
207         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
208         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
209         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
210         struct mmsch_v1_0_cmd_end end = { { 0 } };
211         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
212         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
213
214         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
215         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
216         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
217         end.cmd_header.command_type = MMSCH_COMMAND__END;
218
219         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
220                 header->version = MMSCH_VERSION;
221                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
222
223                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
224                         header->vce_table_offset = header->header_size;
225                 else
226                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
227
228                 init_table += header->vce_table_offset;
229
230                 ring = &adev->vce.ring[0];
231                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
232                                             lower_32_bits(ring->gpu_addr));
233                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
234                                             upper_32_bits(ring->gpu_addr));
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
236                                             ring->ring_size / 4);
237
238                 /* BEGING OF MC_RESUME */
239                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
240                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
241                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
242                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
243                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
244
245                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
246                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
247                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
248                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
249                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
250                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
251                                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
252                 } else {
253                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
255                                                 adev->vce.gpu_addr >> 8);
256                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258                                                 (adev->vce.gpu_addr >> 40) & 0xff);
259                 }
260                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
262                                                 adev->vce.gpu_addr >> 8);
263                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
265                                                 (adev->vce.gpu_addr >> 40) & 0xff);
266                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
267                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
268                                                 adev->vce.gpu_addr >> 8);
269                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
270                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
271                                                 (adev->vce.gpu_addr >> 40) & 0xff);
272
273                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
274                 size = VCE_V4_0_FW_SIZE;
275                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
276                                         offset & ~0x0f000000);
277                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
278
279                 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
280                 size = VCE_V4_0_STACK_SIZE;
281                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
282                                         (offset & ~0x0f000000) | (1 << 24));
283                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
284
285                 offset += size;
286                 size = VCE_V4_0_DATA_SIZE;
287                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
288                                         (offset & ~0x0f000000) | (2 << 24));
289                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
290
291                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
292                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
293                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
294                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
295
296                 /* end of MC_RESUME */
297                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
298                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
299                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
300                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
301                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
302                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
303
304                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
306                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
307
308                 /* clear BUSY flag */
309                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
310                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
311
312                 /* add end packet */
313                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
314                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
315                 header->vce_table_size = table_size;
316         }
317
318         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
319 }
320
321 /**
322  * vce_v4_0_start - start VCE block
323  *
324  * @adev: amdgpu_device pointer
325  *
326  * Setup and start the VCE block
327  */
328 static int vce_v4_0_start(struct amdgpu_device *adev)
329 {
330         struct amdgpu_ring *ring;
331         int r;
332
333         ring = &adev->vce.ring[0];
334
335         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
336         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
337         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
338         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
340
341         ring = &adev->vce.ring[1];
342
343         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
344         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
345         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
346         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
347         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
348
349         ring = &adev->vce.ring[2];
350
351         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
352         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
353         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
354         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
355         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
356
357         vce_v4_0_mc_resume(adev);
358         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
359                         ~VCE_STATUS__JOB_BUSY_MASK);
360
361         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
362
363         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
364                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
365         mdelay(100);
366
367         r = vce_v4_0_firmware_loaded(adev);
368
369         /* clear BUSY flag */
370         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
371
372         if (r) {
373                 DRM_ERROR("VCE not responding, giving up!!!\n");
374                 return r;
375         }
376
377         return 0;
378 }
379
380 static int vce_v4_0_stop(struct amdgpu_device *adev)
381 {
382
383         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
384
385         /* hold on ECPU */
386         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
387                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
388                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
389
390         /* clear BUSY flag */
391         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
392
393         /* Set Clock-Gating off */
394         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
395                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
396         */
397
398         return 0;
399 }
400
401 static int vce_v4_0_early_init(void *handle)
402 {
403         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404
405         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
406                 adev->vce.num_rings = 1;
407         else
408                 adev->vce.num_rings = 3;
409
410         vce_v4_0_set_ring_funcs(adev);
411         vce_v4_0_set_irq_funcs(adev);
412
413         return 0;
414 }
415
416 static int vce_v4_0_sw_init(void *handle)
417 {
418         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
419         struct amdgpu_ring *ring;
420         unsigned size;
421         int r, i;
422
423         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
424         if (r)
425                 return r;
426
427         size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
428         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
429                 size += VCE_V4_0_FW_SIZE;
430
431         r = amdgpu_vce_sw_init(adev, size);
432         if (r)
433                 return r;
434
435         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
436                 const struct common_firmware_header *hdr;
437                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
438
439                 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
440                 if (!adev->vce.saved_bo)
441                         return -ENOMEM;
442
443                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
444                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
445                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
446                 adev->firmware.fw_size +=
447                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
448                 DRM_INFO("PSP loading VCE firmware\n");
449         } else {
450                 r = amdgpu_vce_resume(adev);
451                 if (r)
452                         return r;
453         }
454
455         for (i = 0; i < adev->vce.num_rings; i++) {
456                 ring = &adev->vce.ring[i];
457                 sprintf(ring->name, "vce%d", i);
458                 if (amdgpu_sriov_vf(adev)) {
459                         /* DOORBELL only works under SRIOV */
460                         ring->use_doorbell = true;
461
462                         /* currently only use the first encoding ring for sriov,
463                          * so set unused location for other unused rings.
464                          */
465                         if (i == 0)
466                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
467                         else
468                                 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
469                 }
470                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
471                 if (r)
472                         return r;
473         }
474
475         r = amdgpu_virt_alloc_mm_table(adev);
476         if (r)
477                 return r;
478
479         return r;
480 }
481
482 static int vce_v4_0_sw_fini(void *handle)
483 {
484         int r;
485         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
486
487         /* free MM table */
488         amdgpu_virt_free_mm_table(adev);
489
490         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
491                 kfree(adev->vce.saved_bo);
492                 adev->vce.saved_bo = NULL;
493         }
494
495         r = amdgpu_vce_suspend(adev);
496         if (r)
497                 return r;
498
499         return amdgpu_vce_sw_fini(adev);
500 }
501
502 static int vce_v4_0_hw_init(void *handle)
503 {
504         int r, i;
505         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506
507         if (amdgpu_sriov_vf(adev))
508                 r = vce_v4_0_sriov_start(adev);
509         else
510                 r = vce_v4_0_start(adev);
511         if (r)
512                 return r;
513
514         for (i = 0; i < adev->vce.num_rings; i++)
515                 adev->vce.ring[i].ready = false;
516
517         for (i = 0; i < adev->vce.num_rings; i++) {
518                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
519                 if (r)
520                         return r;
521                 else
522                         adev->vce.ring[i].ready = true;
523         }
524
525         DRM_INFO("VCE initialized successfully.\n");
526
527         return 0;
528 }
529
530 static int vce_v4_0_hw_fini(void *handle)
531 {
532         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
533         int i;
534
535         if (!amdgpu_sriov_vf(adev)) {
536                 /* vce_v4_0_wait_for_idle(handle); */
537                 vce_v4_0_stop(adev);
538         } else {
539                 /* full access mode, so don't touch any VCE register */
540                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
541         }
542
543         for (i = 0; i < adev->vce.num_rings; i++)
544                 adev->vce.ring[i].ready = false;
545
546         return 0;
547 }
548
549 static int vce_v4_0_suspend(void *handle)
550 {
551         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
552         int r;
553
554         if (adev->vce.vcpu_bo == NULL)
555                 return 0;
556
557         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
558                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
559                 void *ptr = adev->vce.cpu_addr;
560
561                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
562         }
563
564         r = vce_v4_0_hw_fini(adev);
565         if (r)
566                 return r;
567
568         return amdgpu_vce_suspend(adev);
569 }
570
571 static int vce_v4_0_resume(void *handle)
572 {
573         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
574         int r;
575
576         if (adev->vce.vcpu_bo == NULL)
577                 return -EINVAL;
578
579         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
580                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
581                 void *ptr = adev->vce.cpu_addr;
582
583                 memcpy_toio(ptr, adev->vce.saved_bo, size);
584         } else {
585                 r = amdgpu_vce_resume(adev);
586                 if (r)
587                         return r;
588         }
589
590         return vce_v4_0_hw_init(adev);
591 }
592
593 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
594 {
595         uint32_t offset, size;
596
597         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
598         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
599         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
600         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
601
602         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
603         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
604         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
605         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
606         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
607
608         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
609                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
610                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
611                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
612                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
613         } else {
614                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
615                         (adev->vce.gpu_addr >> 8));
616                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
617                         (adev->vce.gpu_addr >> 40) & 0xff);
618         }
619
620         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
621         size = VCE_V4_0_FW_SIZE;
622         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
623         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
624
625         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
626         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
627         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
628         size = VCE_V4_0_STACK_SIZE;
629         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
630         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
631
632         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
633         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
634         offset += size;
635         size = VCE_V4_0_DATA_SIZE;
636         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
637         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
638
639         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
640         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
641                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
642                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
643 }
644
645 static int vce_v4_0_set_clockgating_state(void *handle,
646                                           enum amd_clockgating_state state)
647 {
648         /* needed for driver unload*/
649         return 0;
650 }
651
652 #if 0
653 static bool vce_v4_0_is_idle(void *handle)
654 {
655         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
656         u32 mask = 0;
657
658         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
659         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
660
661         return !(RREG32(mmSRBM_STATUS2) & mask);
662 }
663
664 static int vce_v4_0_wait_for_idle(void *handle)
665 {
666         unsigned i;
667         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
668
669         for (i = 0; i < adev->usec_timeout; i++)
670                 if (vce_v4_0_is_idle(handle))
671                         return 0;
672
673         return -ETIMEDOUT;
674 }
675
676 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
677 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
678 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
679 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
680                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
681
682 static bool vce_v4_0_check_soft_reset(void *handle)
683 {
684         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
685         u32 srbm_soft_reset = 0;
686
687         /* According to VCE team , we should use VCE_STATUS instead
688          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
689          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
690          * instance's registers are accessed
691          * (0 for 1st instance, 10 for 2nd instance).
692          *
693          *VCE_STATUS
694          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
695          *|----+----+-----------+----+----+----+----------+---------+----|
696          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
697          *
698          * VCE team suggest use bit 3--bit 6 for busy status check
699          */
700         mutex_lock(&adev->grbm_idx_mutex);
701         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
702         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
703                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
704                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
705         }
706         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
707         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
708                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
709                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
710         }
711         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
712         mutex_unlock(&adev->grbm_idx_mutex);
713
714         if (srbm_soft_reset) {
715                 adev->vce.srbm_soft_reset = srbm_soft_reset;
716                 return true;
717         } else {
718                 adev->vce.srbm_soft_reset = 0;
719                 return false;
720         }
721 }
722
723 static int vce_v4_0_soft_reset(void *handle)
724 {
725         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
726         u32 srbm_soft_reset;
727
728         if (!adev->vce.srbm_soft_reset)
729                 return 0;
730         srbm_soft_reset = adev->vce.srbm_soft_reset;
731
732         if (srbm_soft_reset) {
733                 u32 tmp;
734
735                 tmp = RREG32(mmSRBM_SOFT_RESET);
736                 tmp |= srbm_soft_reset;
737                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
738                 WREG32(mmSRBM_SOFT_RESET, tmp);
739                 tmp = RREG32(mmSRBM_SOFT_RESET);
740
741                 udelay(50);
742
743                 tmp &= ~srbm_soft_reset;
744                 WREG32(mmSRBM_SOFT_RESET, tmp);
745                 tmp = RREG32(mmSRBM_SOFT_RESET);
746
747                 /* Wait a little for things to settle down */
748                 udelay(50);
749         }
750
751         return 0;
752 }
753
754 static int vce_v4_0_pre_soft_reset(void *handle)
755 {
756         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
757
758         if (!adev->vce.srbm_soft_reset)
759                 return 0;
760
761         mdelay(5);
762
763         return vce_v4_0_suspend(adev);
764 }
765
766
767 static int vce_v4_0_post_soft_reset(void *handle)
768 {
769         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770
771         if (!adev->vce.srbm_soft_reset)
772                 return 0;
773
774         mdelay(5);
775
776         return vce_v4_0_resume(adev);
777 }
778
779 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
780 {
781         u32 tmp, data;
782
783         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
784         if (override)
785                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
786         else
787                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
788
789         if (tmp != data)
790                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
791 }
792
793 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
794                                              bool gated)
795 {
796         u32 data;
797
798         /* Set Override to disable Clock Gating */
799         vce_v4_0_override_vce_clock_gating(adev, true);
800
801         /* This function enables MGCG which is controlled by firmware.
802            With the clocks in the gated state the core is still
803            accessible but the firmware will throttle the clocks on the
804            fly as necessary.
805         */
806         if (gated) {
807                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
808                 data |= 0x1ff;
809                 data &= ~0xef0000;
810                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
811
812                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
813                 data |= 0x3ff000;
814                 data &= ~0xffc00000;
815                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
816
817                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
818                 data |= 0x2;
819                 data &= ~0x00010000;
820                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
821
822                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
823                 data |= 0x37f;
824                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
825
826                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
827                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
828                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
829                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
830                         0x8;
831                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
832         } else {
833                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
834                 data &= ~0x80010;
835                 data |= 0xe70008;
836                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
837
838                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
839                 data |= 0xffc00000;
840                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
841
842                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
843                 data |= 0x10000;
844                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
845
846                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
847                 data &= ~0xffc00000;
848                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
849
850                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
851                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
852                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
853                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
854                           0x8);
855                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
856         }
857         vce_v4_0_override_vce_clock_gating(adev, false);
858 }
859
860 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
861 {
862         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
863
864         if (enable)
865                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
866         else
867                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
868
869         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
870 }
871
872 static int vce_v4_0_set_clockgating_state(void *handle,
873                                           enum amd_clockgating_state state)
874 {
875         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
877         int i;
878
879         if ((adev->asic_type == CHIP_POLARIS10) ||
880                 (adev->asic_type == CHIP_TONGA) ||
881                 (adev->asic_type == CHIP_FIJI))
882                 vce_v4_0_set_bypass_mode(adev, enable);
883
884         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
885                 return 0;
886
887         mutex_lock(&adev->grbm_idx_mutex);
888         for (i = 0; i < 2; i++) {
889                 /* Program VCE Instance 0 or 1 if not harvested */
890                 if (adev->vce.harvest_config & (1 << i))
891                         continue;
892
893                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
894
895                 if (enable) {
896                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
897                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
898                         data &= ~(0xf | 0xff0);
899                         data |= ((0x0 << 0) | (0x04 << 4));
900                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
901
902                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
903                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
904                         data &= ~(0xf | 0xff0);
905                         data |= ((0x0 << 0) | (0x04 << 4));
906                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
907                 }
908
909                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
910         }
911
912         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
913         mutex_unlock(&adev->grbm_idx_mutex);
914
915         return 0;
916 }
917
918 static int vce_v4_0_set_powergating_state(void *handle,
919                                           enum amd_powergating_state state)
920 {
921         /* This doesn't actually powergate the VCE block.
922          * That's done in the dpm code via the SMC.  This
923          * just re-inits the block as necessary.  The actual
924          * gating still happens in the dpm code.  We should
925          * revisit this when there is a cleaner line between
926          * the smc and the hw blocks
927          */
928         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
929
930         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
931                 return 0;
932
933         if (state == AMD_PG_STATE_GATE)
934                 /* XXX do we need a vce_v4_0_stop()? */
935                 return 0;
936         else
937                 return vce_v4_0_start(adev);
938 }
939 #endif
940
941 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
942                 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
943 {
944         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
945         amdgpu_ring_write(ring, vmid);
946         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
947         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
948         amdgpu_ring_write(ring, ib->length_dw);
949 }
950
951 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
952                         u64 seq, unsigned flags)
953 {
954         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
955
956         amdgpu_ring_write(ring, VCE_CMD_FENCE);
957         amdgpu_ring_write(ring, addr);
958         amdgpu_ring_write(ring, upper_32_bits(addr));
959         amdgpu_ring_write(ring, seq);
960         amdgpu_ring_write(ring, VCE_CMD_TRAP);
961 }
962
963 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
964 {
965         amdgpu_ring_write(ring, VCE_CMD_END);
966 }
967
968 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
969                                    uint32_t val, uint32_t mask)
970 {
971         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
972         amdgpu_ring_write(ring, reg << 2);
973         amdgpu_ring_write(ring, mask);
974         amdgpu_ring_write(ring, val);
975 }
976
977 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
978                                    unsigned int vmid, uint64_t pd_addr)
979 {
980         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
981
982         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
983
984         /* wait for reg writes */
985         vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
986                                lower_32_bits(pd_addr), 0xffffffff);
987 }
988
989 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
990                                uint32_t reg, uint32_t val)
991 {
992         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993         amdgpu_ring_write(ring, reg << 2);
994         amdgpu_ring_write(ring, val);
995 }
996
997 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
998                                         struct amdgpu_irq_src *source,
999                                         unsigned type,
1000                                         enum amdgpu_interrupt_state state)
1001 {
1002         uint32_t val = 0;
1003
1004         if (!amdgpu_sriov_vf(adev)) {
1005                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1006                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1007
1008                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1009                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1010         }
1011         return 0;
1012 }
1013
1014 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1015                                       struct amdgpu_irq_src *source,
1016                                       struct amdgpu_iv_entry *entry)
1017 {
1018         DRM_DEBUG("IH: VCE\n");
1019
1020         switch (entry->src_data[0]) {
1021         case 0:
1022         case 1:
1023         case 2:
1024                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1025                 break;
1026         default:
1027                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1028                           entry->src_id, entry->src_data[0]);
1029                 break;
1030         }
1031
1032         return 0;
1033 }
1034
1035 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1036         .name = "vce_v4_0",
1037         .early_init = vce_v4_0_early_init,
1038         .late_init = NULL,
1039         .sw_init = vce_v4_0_sw_init,
1040         .sw_fini = vce_v4_0_sw_fini,
1041         .hw_init = vce_v4_0_hw_init,
1042         .hw_fini = vce_v4_0_hw_fini,
1043         .suspend = vce_v4_0_suspend,
1044         .resume = vce_v4_0_resume,
1045         .is_idle = NULL /* vce_v4_0_is_idle */,
1046         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1047         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1048         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1049         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1050         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1051         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1052         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1053 };
1054
1055 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1056         .type = AMDGPU_RING_TYPE_VCE,
1057         .align_mask = 0x3f,
1058         .nop = VCE_CMD_NO_OP,
1059         .support_64bit_ptrs = false,
1060         .vmhub = AMDGPU_MMHUB,
1061         .get_rptr = vce_v4_0_ring_get_rptr,
1062         .get_wptr = vce_v4_0_ring_get_wptr,
1063         .set_wptr = vce_v4_0_ring_set_wptr,
1064         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1065         .emit_frame_size =
1066                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1067                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1068                 4 + /* vce_v4_0_emit_vm_flush */
1069                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1070                 1, /* vce_v4_0_ring_insert_end */
1071         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1072         .emit_ib = vce_v4_0_ring_emit_ib,
1073         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1074         .emit_fence = vce_v4_0_ring_emit_fence,
1075         .test_ring = amdgpu_vce_ring_test_ring,
1076         .test_ib = amdgpu_vce_ring_test_ib,
1077         .insert_nop = amdgpu_ring_insert_nop,
1078         .insert_end = vce_v4_0_ring_insert_end,
1079         .pad_ib = amdgpu_ring_generic_pad_ib,
1080         .begin_use = amdgpu_vce_ring_begin_use,
1081         .end_use = amdgpu_vce_ring_end_use,
1082         .emit_wreg = vce_v4_0_emit_wreg,
1083         .emit_reg_wait = vce_v4_0_emit_reg_wait,
1084         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1085 };
1086
1087 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1088 {
1089         int i;
1090
1091         for (i = 0; i < adev->vce.num_rings; i++)
1092                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1093         DRM_INFO("VCE enabled in VM mode\n");
1094 }
1095
1096 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1097         .set = vce_v4_0_set_interrupt_state,
1098         .process = vce_v4_0_process_interrupt,
1099 };
1100
1101 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1102 {
1103         adev->vce.irq.num_types = 1;
1104         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1105 };
1106
1107 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1108 {
1109         .type = AMD_IP_BLOCK_TYPE_VCE,
1110         .major = 4,
1111         .minor = 0,
1112         .rev = 0,
1113         .funcs = &vce_v4_0_ip_funcs,
1114 };