cpuidle: psci: Do not suspend topology CPUs on PREEMPT_RT
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v11_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51
52 #define GFX11_NUM_GFX_RINGS             1
53 #define GFX11_MEC_HPD_SIZE      2048
54
55 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
57
58 #define regCGTT_WD_CLK_CTRL             0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
62
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
84
85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
86 {
87         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
88         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
89         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
90         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
91         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
92         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
93         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
94         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
95         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
96 };
97
98 #define DEFAULT_SH_MEM_CONFIG \
99         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
100          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
101          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
102
103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
111                                  struct amdgpu_cu_info *cu_info);
112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
114                                    u32 sh_num, u32 instance);
115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
116
117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
120                                      uint32_t val);
121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
123                                            uint16_t pasid, uint32_t flush_type,
124                                            bool all_hub, uint8_t dst_sel);
125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
128                                       bool enable);
129
130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
131 {
132         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
133         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
134                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
135         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
136         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
137         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
138         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
139         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
140         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
141 }
142
143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
144                                  struct amdgpu_ring *ring)
145 {
146         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
147         uint64_t wptr_addr = ring->wptr_gpu_addr;
148         uint32_t me = 0, eng_sel = 0;
149
150         switch (ring->funcs->type) {
151         case AMDGPU_RING_TYPE_COMPUTE:
152                 me = 1;
153                 eng_sel = 0;
154                 break;
155         case AMDGPU_RING_TYPE_GFX:
156                 me = 0;
157                 eng_sel = 4;
158                 break;
159         case AMDGPU_RING_TYPE_MES:
160                 me = 2;
161                 eng_sel = 5;
162                 break;
163         default:
164                 WARN_ON(1);
165         }
166
167         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
168         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
169         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
170                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
171                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
172                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
173                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
174                           PACKET3_MAP_QUEUES_ME((me)) |
175                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
176                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
177                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
178                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
179         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
180         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
181         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
182         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
183         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
184 }
185
186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
187                                    struct amdgpu_ring *ring,
188                                    enum amdgpu_unmap_queues_action action,
189                                    u64 gpu_addr, u64 seq)
190 {
191         struct amdgpu_device *adev = kiq_ring->adev;
192         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
193
194         if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
195                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
196                 return;
197         }
198
199         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
200         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
201                           PACKET3_UNMAP_QUEUES_ACTION(action) |
202                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
203                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
204                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
205         amdgpu_ring_write(kiq_ring,
206                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
207
208         if (action == PREEMPT_QUEUES_NO_UNMAP) {
209                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
210                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
211                 amdgpu_ring_write(kiq_ring, seq);
212         } else {
213                 amdgpu_ring_write(kiq_ring, 0);
214                 amdgpu_ring_write(kiq_ring, 0);
215                 amdgpu_ring_write(kiq_ring, 0);
216         }
217 }
218
219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
220                                    struct amdgpu_ring *ring,
221                                    u64 addr,
222                                    u64 seq)
223 {
224         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
225
226         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
227         amdgpu_ring_write(kiq_ring,
228                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
229                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
230                           PACKET3_QUERY_STATUS_COMMAND(2));
231         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
232                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
233                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
234         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
235         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
236         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
237         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
238 }
239
240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
241                                 uint16_t pasid, uint32_t flush_type,
242                                 bool all_hub)
243 {
244         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
245 }
246
247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
248         .kiq_set_resources = gfx11_kiq_set_resources,
249         .kiq_map_queues = gfx11_kiq_map_queues,
250         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
251         .kiq_query_status = gfx11_kiq_query_status,
252         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
253         .set_resources_size = 8,
254         .map_queues_size = 7,
255         .unmap_queues_size = 6,
256         .query_status_size = 7,
257         .invalidate_tlbs_size = 2,
258 };
259
260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
261 {
262         adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
263 }
264
265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
266 {
267         switch (adev->ip_versions[GC_HWIP][0]) {
268         case IP_VERSION(11, 0, 1):
269         case IP_VERSION(11, 0, 4):
270                 soc15_program_register_sequence(adev,
271                                                 golden_settings_gc_11_0_1,
272                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
273                 break;
274         default:
275                 break;
276         }
277 }
278
279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
280                                        bool wc, uint32_t reg, uint32_t val)
281 {
282         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
283         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
284                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
285         amdgpu_ring_write(ring, reg);
286         amdgpu_ring_write(ring, 0);
287         amdgpu_ring_write(ring, val);
288 }
289
290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
291                                   int mem_space, int opt, uint32_t addr0,
292                                   uint32_t addr1, uint32_t ref, uint32_t mask,
293                                   uint32_t inv)
294 {
295         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
296         amdgpu_ring_write(ring,
297                           /* memory (1) or register (0) */
298                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
299                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
300                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
301                            WAIT_REG_MEM_ENGINE(eng_sel)));
302
303         if (mem_space)
304                 BUG_ON(addr0 & 0x3); /* Dword align */
305         amdgpu_ring_write(ring, addr0);
306         amdgpu_ring_write(ring, addr1);
307         amdgpu_ring_write(ring, ref);
308         amdgpu_ring_write(ring, mask);
309         amdgpu_ring_write(ring, inv); /* poll interval */
310 }
311
312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
313 {
314         struct amdgpu_device *adev = ring->adev;
315         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
316         uint32_t tmp = 0;
317         unsigned i;
318         int r;
319
320         WREG32(scratch, 0xCAFEDEAD);
321         r = amdgpu_ring_alloc(ring, 5);
322         if (r) {
323                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
324                           ring->idx, r);
325                 return r;
326         }
327
328         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
329                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
330         } else {
331                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
332                 amdgpu_ring_write(ring, scratch -
333                                   PACKET3_SET_UCONFIG_REG_START);
334                 amdgpu_ring_write(ring, 0xDEADBEEF);
335         }
336         amdgpu_ring_commit(ring);
337
338         for (i = 0; i < adev->usec_timeout; i++) {
339                 tmp = RREG32(scratch);
340                 if (tmp == 0xDEADBEEF)
341                         break;
342                 if (amdgpu_emu_mode == 1)
343                         msleep(1);
344                 else
345                         udelay(1);
346         }
347
348         if (i >= adev->usec_timeout)
349                 r = -ETIMEDOUT;
350         return r;
351 }
352
353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
354 {
355         struct amdgpu_device *adev = ring->adev;
356         struct amdgpu_ib ib;
357         struct dma_fence *f = NULL;
358         unsigned index;
359         uint64_t gpu_addr;
360         volatile uint32_t *cpu_ptr;
361         long r;
362
363         /* MES KIQ fw hasn't indirect buffer support for now */
364         if (adev->enable_mes_kiq &&
365             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
366                 return 0;
367
368         memset(&ib, 0, sizeof(ib));
369
370         if (ring->is_mes_queue) {
371                 uint32_t padding, offset;
372
373                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
374                 padding = amdgpu_mes_ctx_get_offs(ring,
375                                                   AMDGPU_MES_CTX_PADDING_OFFS);
376
377                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
378                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
379
380                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
381                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
382                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
383         } else {
384                 r = amdgpu_device_wb_get(adev, &index);
385                 if (r)
386                         return r;
387
388                 gpu_addr = adev->wb.gpu_addr + (index * 4);
389                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
390                 cpu_ptr = &adev->wb.wb[index];
391
392                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
393                 if (r) {
394                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
395                         goto err1;
396                 }
397         }
398
399         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
400         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
401         ib.ptr[2] = lower_32_bits(gpu_addr);
402         ib.ptr[3] = upper_32_bits(gpu_addr);
403         ib.ptr[4] = 0xDEADBEEF;
404         ib.length_dw = 5;
405
406         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
407         if (r)
408                 goto err2;
409
410         r = dma_fence_wait_timeout(f, false, timeout);
411         if (r == 0) {
412                 r = -ETIMEDOUT;
413                 goto err2;
414         } else if (r < 0) {
415                 goto err2;
416         }
417
418         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
419                 r = 0;
420         else
421                 r = -EINVAL;
422 err2:
423         if (!ring->is_mes_queue)
424                 amdgpu_ib_free(adev, &ib, NULL);
425         dma_fence_put(f);
426 err1:
427         if (!ring->is_mes_queue)
428                 amdgpu_device_wb_free(adev, index);
429         return r;
430 }
431
432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
433 {
434         release_firmware(adev->gfx.pfp_fw);
435         adev->gfx.pfp_fw = NULL;
436         release_firmware(adev->gfx.me_fw);
437         adev->gfx.me_fw = NULL;
438         release_firmware(adev->gfx.rlc_fw);
439         adev->gfx.rlc_fw = NULL;
440         release_firmware(adev->gfx.mec_fw);
441         adev->gfx.mec_fw = NULL;
442
443         kfree(adev->gfx.rlc.register_list_format);
444 }
445
446 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
447 {
448         char fw_name[40];
449         char ucode_prefix[30];
450         int err;
451         const struct rlc_firmware_header_v2_0 *rlc_hdr;
452         uint16_t version_major;
453         uint16_t version_minor;
454
455         DRM_DEBUG("\n");
456
457         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
458
459         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
460         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
461         if (err)
462                 goto out;
463         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
464         if (err)
465                 goto out;
466         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
467         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
468                                 (union amdgpu_firmware_header *)
469                                 adev->gfx.pfp_fw->data, 2, 0);
470         if (adev->gfx.rs64_enable) {
471                 dev_info(adev->dev, "CP RS64 enable\n");
472                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
473                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
474                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
475         } else {
476                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
477         }
478
479         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
480         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
481         if (err)
482                 goto out;
483         err = amdgpu_ucode_validate(adev->gfx.me_fw);
484         if (err)
485                 goto out;
486         if (adev->gfx.rs64_enable) {
487                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
488                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
489                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
490         } else {
491                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
492         }
493
494         if (!amdgpu_sriov_vf(adev)) {
495                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
496                 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
497                 if (err)
498                         goto out;
499                 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
500                 if (err)
501                         goto out;
502                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
503                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
504                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
505                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
506                 if (err)
507                         goto out;
508         }
509
510         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
511         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
512         if (err)
513                 goto out;
514         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
515         if (err)
516                 goto out;
517         if (adev->gfx.rs64_enable) {
518                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
519                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
520                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
521                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
522                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
523         } else {
524                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
525                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
526         }
527
528         /* only one MEC for gfx 11.0.0. */
529         adev->gfx.mec2_fw = NULL;
530
531 out:
532         if (err) {
533                 dev_err(adev->dev,
534                         "gfx11: Failed to init firmware \"%s\"\n",
535                         fw_name);
536                 release_firmware(adev->gfx.pfp_fw);
537                 adev->gfx.pfp_fw = NULL;
538                 release_firmware(adev->gfx.me_fw);
539                 adev->gfx.me_fw = NULL;
540                 release_firmware(adev->gfx.rlc_fw);
541                 adev->gfx.rlc_fw = NULL;
542                 release_firmware(adev->gfx.mec_fw);
543                 adev->gfx.mec_fw = NULL;
544         }
545
546         return err;
547 }
548
549 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
550 {
551         const struct psp_firmware_header_v1_0 *toc_hdr;
552         int err = 0;
553         char fw_name[40];
554         char ucode_prefix[30];
555
556         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
557
558         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
559         err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
560         if (err)
561                 goto out;
562
563         err = amdgpu_ucode_validate(adev->psp.toc_fw);
564         if (err)
565                 goto out;
566
567         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
568         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
569         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
570         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
571         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
572                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
573         return 0;
574 out:
575         dev_err(adev->dev, "Failed to load TOC microcode\n");
576         release_firmware(adev->psp.toc_fw);
577         adev->psp.toc_fw = NULL;
578         return err;
579 }
580
581 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
582 {
583         u32 count = 0;
584         const struct cs_section_def *sect = NULL;
585         const struct cs_extent_def *ext = NULL;
586
587         /* begin clear state */
588         count += 2;
589         /* context control state */
590         count += 3;
591
592         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
593                 for (ext = sect->section; ext->extent != NULL; ++ext) {
594                         if (sect->id == SECT_CONTEXT)
595                                 count += 2 + ext->reg_count;
596                         else
597                                 return 0;
598                 }
599         }
600
601         /* set PA_SC_TILE_STEERING_OVERRIDE */
602         count += 3;
603         /* end clear state */
604         count += 2;
605         /* clear state */
606         count += 2;
607
608         return count;
609 }
610
611 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
612                                     volatile u32 *buffer)
613 {
614         u32 count = 0, i;
615         const struct cs_section_def *sect = NULL;
616         const struct cs_extent_def *ext = NULL;
617         int ctx_reg_offset;
618
619         if (adev->gfx.rlc.cs_data == NULL)
620                 return;
621         if (buffer == NULL)
622                 return;
623
624         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
625         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
626
627         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
628         buffer[count++] = cpu_to_le32(0x80000000);
629         buffer[count++] = cpu_to_le32(0x80000000);
630
631         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
632                 for (ext = sect->section; ext->extent != NULL; ++ext) {
633                         if (sect->id == SECT_CONTEXT) {
634                                 buffer[count++] =
635                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
636                                 buffer[count++] = cpu_to_le32(ext->reg_index -
637                                                 PACKET3_SET_CONTEXT_REG_START);
638                                 for (i = 0; i < ext->reg_count; i++)
639                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
640                         } else {
641                                 return;
642                         }
643                 }
644         }
645
646         ctx_reg_offset =
647                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
648         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
649         buffer[count++] = cpu_to_le32(ctx_reg_offset);
650         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
651
652         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
653         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
654
655         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
656         buffer[count++] = cpu_to_le32(0);
657 }
658
659 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
660 {
661         /* clear state block */
662         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
663                         &adev->gfx.rlc.clear_state_gpu_addr,
664                         (void **)&adev->gfx.rlc.cs_ptr);
665
666         /* jump table block */
667         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
668                         &adev->gfx.rlc.cp_table_gpu_addr,
669                         (void **)&adev->gfx.rlc.cp_table_ptr);
670 }
671
672 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
673 {
674         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
675
676         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
677         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
678         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
679         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
680         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
681         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
682         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
683         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
684         adev->gfx.rlc.rlcg_reg_access_supported = true;
685 }
686
687 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
688 {
689         const struct cs_section_def *cs_data;
690         int r;
691
692         adev->gfx.rlc.cs_data = gfx11_cs_data;
693
694         cs_data = adev->gfx.rlc.cs_data;
695
696         if (cs_data) {
697                 /* init clear state block */
698                 r = amdgpu_gfx_rlc_init_csb(adev);
699                 if (r)
700                         return r;
701         }
702
703         /* init spm vmid with 0xf */
704         if (adev->gfx.rlc.funcs->update_spm_vmid)
705                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
706
707         return 0;
708 }
709
710 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
711 {
712         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
713         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
714         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
715 }
716
717 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
718 {
719         int r;
720
721         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
722
723         amdgpu_gfx_graphics_queue_acquire(adev);
724
725         r = gfx_v11_0_init_microcode(adev);
726         if (r)
727                 DRM_ERROR("Failed to load gfx firmware!\n");
728
729         return r;
730 }
731
732 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
733 {
734         int r;
735         u32 *hpd;
736         size_t mec_hpd_size;
737
738         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
739
740         /* take ownership of the relevant compute queues */
741         amdgpu_gfx_compute_queue_acquire(adev);
742         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
743
744         if (mec_hpd_size) {
745                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
746                                               AMDGPU_GEM_DOMAIN_GTT,
747                                               &adev->gfx.mec.hpd_eop_obj,
748                                               &adev->gfx.mec.hpd_eop_gpu_addr,
749                                               (void **)&hpd);
750                 if (r) {
751                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
752                         gfx_v11_0_mec_fini(adev);
753                         return r;
754                 }
755
756                 memset(hpd, 0, mec_hpd_size);
757
758                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
759                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
760         }
761
762         return 0;
763 }
764
765 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
766 {
767         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
768                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
769                 (address << SQ_IND_INDEX__INDEX__SHIFT));
770         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
771 }
772
773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
774                            uint32_t thread, uint32_t regno,
775                            uint32_t num, uint32_t *out)
776 {
777         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
778                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
779                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
780                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
781                 (SQ_IND_INDEX__AUTO_INCR_MASK));
782         while (num--)
783                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
784 }
785
786 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
787 {
788         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
789          * field when performing a select_se_sh so it should be
790          * zero here */
791         WARN_ON(simd != 0);
792
793         /* type 2 wave data */
794         dst[(*no_fields)++] = 2;
795         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
796         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
797         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
798         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
799         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
800         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
801         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
802         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
803         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
804         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
805         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
806         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
807         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
808         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
810 }
811
812 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
813                                      uint32_t wave, uint32_t start,
814                                      uint32_t size, uint32_t *dst)
815 {
816         WARN_ON(simd != 0);
817
818         wave_read_regs(
819                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
820                 dst);
821 }
822
823 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
824                                       uint32_t wave, uint32_t thread,
825                                       uint32_t start, uint32_t size,
826                                       uint32_t *dst)
827 {
828         wave_read_regs(
829                 adev, wave, thread,
830                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
831 }
832
833 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
834                                                                           u32 me, u32 pipe, u32 q, u32 vm)
835 {
836         soc21_grbm_select(adev, me, pipe, q, vm);
837 }
838
839 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
840         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
841         .select_se_sh = &gfx_v11_0_select_se_sh,
842         .read_wave_data = &gfx_v11_0_read_wave_data,
843         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
844         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
845         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
846         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
847 };
848
849 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
850 {
851
852         switch (adev->ip_versions[GC_HWIP][0]) {
853         case IP_VERSION(11, 0, 0):
854         case IP_VERSION(11, 0, 2):
855         case IP_VERSION(11, 0, 3):
856                 adev->gfx.config.max_hw_contexts = 8;
857                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
858                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
859                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
860                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
861                 break;
862         case IP_VERSION(11, 0, 1):
863         case IP_VERSION(11, 0, 4):
864                 adev->gfx.config.max_hw_contexts = 8;
865                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
866                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
867                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
868                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
869                 break;
870         default:
871                 BUG();
872                 break;
873         }
874
875         return 0;
876 }
877
878 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
879                                    int me, int pipe, int queue)
880 {
881         int r;
882         struct amdgpu_ring *ring;
883         unsigned int irq_type;
884
885         ring = &adev->gfx.gfx_ring[ring_id];
886
887         ring->me = me;
888         ring->pipe = pipe;
889         ring->queue = queue;
890
891         ring->ring_obj = NULL;
892         ring->use_doorbell = true;
893
894         if (!ring_id)
895                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
896         else
897                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
898         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
899
900         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
901         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
902                              AMDGPU_RING_PRIO_DEFAULT, NULL);
903         if (r)
904                 return r;
905         return 0;
906 }
907
908 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
909                                        int mec, int pipe, int queue)
910 {
911         int r;
912         unsigned irq_type;
913         struct amdgpu_ring *ring;
914         unsigned int hw_prio;
915
916         ring = &adev->gfx.compute_ring[ring_id];
917
918         /* mec0 is me1 */
919         ring->me = mec + 1;
920         ring->pipe = pipe;
921         ring->queue = queue;
922
923         ring->ring_obj = NULL;
924         ring->use_doorbell = true;
925         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
926         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
927                                 + (ring_id * GFX11_MEC_HPD_SIZE);
928         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
929
930         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
931                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
932                 + ring->pipe;
933         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
934                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
935         /* type-2 packets are deprecated on MEC, use type-3 instead */
936         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
937                              hw_prio, NULL);
938         if (r)
939                 return r;
940
941         return 0;
942 }
943
944 static struct {
945         SOC21_FIRMWARE_ID       id;
946         unsigned int            offset;
947         unsigned int            size;
948 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
949
950 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
951 {
952         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
953
954         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
955                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
956                 rlc_autoload_info[ucode->id].id = ucode->id;
957                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
958                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
959
960                 ucode++;
961         }
962 }
963
964 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
965 {
966         uint32_t total_size = 0;
967         SOC21_FIRMWARE_ID id;
968
969         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
970
971         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
972                 total_size += rlc_autoload_info[id].size;
973
974         /* In case the offset in rlc toc ucode is aligned */
975         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
976                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
977                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
978
979         return total_size;
980 }
981
982 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
983 {
984         int r;
985         uint32_t total_size;
986
987         total_size = gfx_v11_0_calc_toc_total_size(adev);
988
989         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
990                         AMDGPU_GEM_DOMAIN_VRAM,
991                         &adev->gfx.rlc.rlc_autoload_bo,
992                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
993                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
994
995         if (r) {
996                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
997                 return r;
998         }
999
1000         return 0;
1001 }
1002
1003 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1004                                               SOC21_FIRMWARE_ID id,
1005                                               const void *fw_data,
1006                                               uint32_t fw_size,
1007                                               uint32_t *fw_autoload_mask)
1008 {
1009         uint32_t toc_offset;
1010         uint32_t toc_fw_size;
1011         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1012
1013         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1014                 return;
1015
1016         toc_offset = rlc_autoload_info[id].offset;
1017         toc_fw_size = rlc_autoload_info[id].size;
1018
1019         if (fw_size == 0)
1020                 fw_size = toc_fw_size;
1021
1022         if (fw_size > toc_fw_size)
1023                 fw_size = toc_fw_size;
1024
1025         memcpy(ptr + toc_offset, fw_data, fw_size);
1026
1027         if (fw_size < toc_fw_size)
1028                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1029
1030         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1031                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1032 }
1033
1034 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1035                                                         uint32_t *fw_autoload_mask)
1036 {
1037         void *data;
1038         uint32_t size;
1039         uint64_t *toc_ptr;
1040
1041         *(uint64_t *)fw_autoload_mask |= 0x1;
1042
1043         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1044
1045         data = adev->psp.toc.start_addr;
1046         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1047
1048         toc_ptr = (uint64_t *)data + size / 8 - 1;
1049         *toc_ptr = *(uint64_t *)fw_autoload_mask;
1050
1051         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1052                                         data, size, fw_autoload_mask);
1053 }
1054
1055 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1056                                                         uint32_t *fw_autoload_mask)
1057 {
1058         const __le32 *fw_data;
1059         uint32_t fw_size;
1060         const struct gfx_firmware_header_v1_0 *cp_hdr;
1061         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1062         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1063         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1064         uint16_t version_major, version_minor;
1065
1066         if (adev->gfx.rs64_enable) {
1067                 /* pfp ucode */
1068                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1069                         adev->gfx.pfp_fw->data;
1070                 /* instruction */
1071                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1072                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1073                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1074                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1075                                                 fw_data, fw_size, fw_autoload_mask);
1076                 /* data */
1077                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1078                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1079                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1080                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1081                                                 fw_data, fw_size, fw_autoload_mask);
1082                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1083                                                 fw_data, fw_size, fw_autoload_mask);
1084                 /* me ucode */
1085                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1086                         adev->gfx.me_fw->data;
1087                 /* instruction */
1088                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1089                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1090                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1091                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1092                                                 fw_data, fw_size, fw_autoload_mask);
1093                 /* data */
1094                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1095                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1096                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1097                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1098                                                 fw_data, fw_size, fw_autoload_mask);
1099                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1100                                                 fw_data, fw_size, fw_autoload_mask);
1101                 /* mec ucode */
1102                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1103                         adev->gfx.mec_fw->data;
1104                 /* instruction */
1105                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1106                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1107                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1108                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1109                                                 fw_data, fw_size, fw_autoload_mask);
1110                 /* data */
1111                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1112                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1113                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1114                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1115                                                 fw_data, fw_size, fw_autoload_mask);
1116                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1117                                                 fw_data, fw_size, fw_autoload_mask);
1118                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1119                                                 fw_data, fw_size, fw_autoload_mask);
1120                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1121                                                 fw_data, fw_size, fw_autoload_mask);
1122         } else {
1123                 /* pfp ucode */
1124                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1125                         adev->gfx.pfp_fw->data;
1126                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1127                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1128                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1129                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1130                                                 fw_data, fw_size, fw_autoload_mask);
1131
1132                 /* me ucode */
1133                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1134                         adev->gfx.me_fw->data;
1135                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1136                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1137                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1138                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1139                                                 fw_data, fw_size, fw_autoload_mask);
1140
1141                 /* mec ucode */
1142                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1143                         adev->gfx.mec_fw->data;
1144                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1145                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1146                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1147                         cp_hdr->jt_size * 4;
1148                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1149                                                 fw_data, fw_size, fw_autoload_mask);
1150         }
1151
1152         /* rlc ucode */
1153         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1154                 adev->gfx.rlc_fw->data;
1155         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1156                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1157         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1158         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1159                                         fw_data, fw_size, fw_autoload_mask);
1160
1161         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1162         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1163         if (version_major == 2) {
1164                 if (version_minor >= 2) {
1165                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1166
1167                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1168                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1169                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1170                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1171                                         fw_data, fw_size, fw_autoload_mask);
1172
1173                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1174                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1175                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1176                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1177                                         fw_data, fw_size, fw_autoload_mask);
1178                 }
1179         }
1180 }
1181
1182 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1183                                                         uint32_t *fw_autoload_mask)
1184 {
1185         const __le32 *fw_data;
1186         uint32_t fw_size;
1187         const struct sdma_firmware_header_v2_0 *sdma_hdr;
1188
1189         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1190                 adev->sdma.instance[0].fw->data;
1191         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1192                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1193         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1194
1195         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1196                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1197
1198         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1199                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1200         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1201
1202         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1203                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1204 }
1205
1206 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1207                                                         uint32_t *fw_autoload_mask)
1208 {
1209         const __le32 *fw_data;
1210         unsigned fw_size;
1211         const struct mes_firmware_header_v1_0 *mes_hdr;
1212         int pipe, ucode_id, data_id;
1213
1214         for (pipe = 0; pipe < 2; pipe++) {
1215                 if (pipe==0) {
1216                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1217                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1218                 } else {
1219                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1220                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1221                 }
1222
1223                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1224                         adev->mes.fw[pipe]->data;
1225
1226                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1227                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1228                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1229
1230                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1231                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1232
1233                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1234                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1235                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1236
1237                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1238                                 data_id, fw_data, fw_size, fw_autoload_mask);
1239         }
1240 }
1241
1242 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1243 {
1244         uint32_t rlc_g_offset, rlc_g_size;
1245         uint64_t gpu_addr;
1246         uint32_t autoload_fw_id[2];
1247
1248         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1249
1250         /* RLC autoload sequence 2: copy ucode */
1251         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1252         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1253         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1254         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1255
1256         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1257         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1258         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1259
1260         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1261         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1262
1263         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1264
1265         /* RLC autoload sequence 3: load IMU fw */
1266         if (adev->gfx.imu.funcs->load_microcode)
1267                 adev->gfx.imu.funcs->load_microcode(adev);
1268         /* RLC autoload sequence 4 init IMU fw */
1269         if (adev->gfx.imu.funcs->setup_imu)
1270                 adev->gfx.imu.funcs->setup_imu(adev);
1271         if (adev->gfx.imu.funcs->start_imu)
1272                 adev->gfx.imu.funcs->start_imu(adev);
1273
1274         /* RLC autoload sequence 5 disable gpa mode */
1275         gfx_v11_0_disable_gpa_mode(adev);
1276
1277         return 0;
1278 }
1279
1280 static int gfx_v11_0_sw_init(void *handle)
1281 {
1282         int i, j, k, r, ring_id = 0;
1283         struct amdgpu_kiq *kiq;
1284         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1285
1286         adev->gfxhub.funcs->init(adev);
1287
1288         switch (adev->ip_versions[GC_HWIP][0]) {
1289         case IP_VERSION(11, 0, 0):
1290         case IP_VERSION(11, 0, 1):
1291         case IP_VERSION(11, 0, 2):
1292         case IP_VERSION(11, 0, 3):
1293         case IP_VERSION(11, 0, 4):
1294                 adev->gfx.me.num_me = 1;
1295                 adev->gfx.me.num_pipe_per_me = 1;
1296                 adev->gfx.me.num_queue_per_pipe = 1;
1297                 adev->gfx.mec.num_mec = 2;
1298                 adev->gfx.mec.num_pipe_per_mec = 4;
1299                 adev->gfx.mec.num_queue_per_pipe = 4;
1300                 break;
1301         default:
1302                 adev->gfx.me.num_me = 1;
1303                 adev->gfx.me.num_pipe_per_me = 1;
1304                 adev->gfx.me.num_queue_per_pipe = 1;
1305                 adev->gfx.mec.num_mec = 1;
1306                 adev->gfx.mec.num_pipe_per_mec = 4;
1307                 adev->gfx.mec.num_queue_per_pipe = 8;
1308                 break;
1309         }
1310
1311         /* EOP Event */
1312         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1313                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1314                               &adev->gfx.eop_irq);
1315         if (r)
1316                 return r;
1317
1318         /* Privileged reg */
1319         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1320                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1321                               &adev->gfx.priv_reg_irq);
1322         if (r)
1323                 return r;
1324
1325         /* Privileged inst */
1326         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1327                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1328                               &adev->gfx.priv_inst_irq);
1329         if (r)
1330                 return r;
1331
1332         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1333
1334         if (adev->gfx.imu.funcs) {
1335                 if (adev->gfx.imu.funcs->init_microcode) {
1336                         r = adev->gfx.imu.funcs->init_microcode(adev);
1337                         if (r)
1338                                 DRM_ERROR("Failed to load imu firmware!\n");
1339                 }
1340         }
1341
1342         r = gfx_v11_0_me_init(adev);
1343         if (r)
1344                 return r;
1345
1346         r = gfx_v11_0_rlc_init(adev);
1347         if (r) {
1348                 DRM_ERROR("Failed to init rlc BOs!\n");
1349                 return r;
1350         }
1351
1352         r = gfx_v11_0_mec_init(adev);
1353         if (r) {
1354                 DRM_ERROR("Failed to init MEC BOs!\n");
1355                 return r;
1356         }
1357
1358         /* set up the gfx ring */
1359         for (i = 0; i < adev->gfx.me.num_me; i++) {
1360                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1361                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1362                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1363                                         continue;
1364
1365                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1366                                                             i, k, j);
1367                                 if (r)
1368                                         return r;
1369                                 ring_id++;
1370                         }
1371                 }
1372         }
1373
1374         ring_id = 0;
1375         /* set up the compute queues - allocate horizontally across pipes */
1376         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1377                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1378                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1379                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1380                                                                      j))
1381                                         continue;
1382
1383                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1384                                                                 i, k, j);
1385                                 if (r)
1386                                         return r;
1387
1388                                 ring_id++;
1389                         }
1390                 }
1391         }
1392
1393         if (!adev->enable_mes_kiq) {
1394                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1395                 if (r) {
1396                         DRM_ERROR("Failed to init KIQ BOs!\n");
1397                         return r;
1398                 }
1399
1400                 kiq = &adev->gfx.kiq;
1401                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1402                 if (r)
1403                         return r;
1404         }
1405
1406         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1407         if (r)
1408                 return r;
1409
1410         /* allocate visible FB for rlc auto-loading fw */
1411         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1412                 r = gfx_v11_0_init_toc_microcode(adev);
1413                 if (r)
1414                         dev_err(adev->dev, "Failed to load toc firmware!\n");
1415                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1416                 if (r)
1417                         return r;
1418         }
1419
1420         r = gfx_v11_0_gpu_early_init(adev);
1421         if (r)
1422                 return r;
1423
1424         return 0;
1425 }
1426
1427 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1428 {
1429         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1430                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1431                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1432
1433         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1434                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1435                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1436 }
1437
1438 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1439 {
1440         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1441                               &adev->gfx.me.me_fw_gpu_addr,
1442                               (void **)&adev->gfx.me.me_fw_ptr);
1443
1444         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1445                                &adev->gfx.me.me_fw_data_gpu_addr,
1446                                (void **)&adev->gfx.me.me_fw_data_ptr);
1447 }
1448
1449 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1450 {
1451         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1452                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1453                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1454 }
1455
1456 static int gfx_v11_0_sw_fini(void *handle)
1457 {
1458         int i;
1459         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1460
1461         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1462                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1463         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1464                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1465
1466         amdgpu_gfx_mqd_sw_fini(adev);
1467
1468         if (!adev->enable_mes_kiq) {
1469                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1470                 amdgpu_gfx_kiq_fini(adev);
1471         }
1472
1473         gfx_v11_0_pfp_fini(adev);
1474         gfx_v11_0_me_fini(adev);
1475         gfx_v11_0_rlc_fini(adev);
1476         gfx_v11_0_mec_fini(adev);
1477
1478         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1479                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
1480
1481         gfx_v11_0_free_microcode(adev);
1482
1483         return 0;
1484 }
1485
1486 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1487                                    u32 sh_num, u32 instance)
1488 {
1489         u32 data;
1490
1491         if (instance == 0xffffffff)
1492                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1493                                      INSTANCE_BROADCAST_WRITES, 1);
1494         else
1495                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1496                                      instance);
1497
1498         if (se_num == 0xffffffff)
1499                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1500                                      1);
1501         else
1502                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1503
1504         if (sh_num == 0xffffffff)
1505                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1506                                      1);
1507         else
1508                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1509
1510         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1511 }
1512
1513 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1514 {
1515         u32 data, mask;
1516
1517         data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1518         data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1519
1520         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1521         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1522
1523         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1524                                          adev->gfx.config.max_sh_per_se);
1525
1526         return (~data) & mask;
1527 }
1528
1529 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1530 {
1531         int i, j;
1532         u32 data;
1533         u32 active_rbs = 0;
1534         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1535                                         adev->gfx.config.max_sh_per_se;
1536
1537         mutex_lock(&adev->grbm_idx_mutex);
1538         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1539                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1540                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1541                         data = gfx_v11_0_get_rb_active_bitmap(adev);
1542                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1543                                                rb_bitmap_width_per_sh);
1544                 }
1545         }
1546         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1547         mutex_unlock(&adev->grbm_idx_mutex);
1548
1549         adev->gfx.config.backend_enable_mask = active_rbs;
1550         adev->gfx.config.num_rbs = hweight32(active_rbs);
1551 }
1552
1553 #define DEFAULT_SH_MEM_BASES    (0x6000)
1554 #define LDS_APP_BASE           0x1
1555 #define SCRATCH_APP_BASE       0x2
1556
1557 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1558 {
1559         int i;
1560         uint32_t sh_mem_bases;
1561         uint32_t data;
1562
1563         /*
1564          * Configure apertures:
1565          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1566          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1567          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1568          */
1569         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1570                         SCRATCH_APP_BASE;
1571
1572         mutex_lock(&adev->srbm_mutex);
1573         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1574                 soc21_grbm_select(adev, 0, 0, 0, i);
1575                 /* CP and shaders */
1576                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1577                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1578
1579                 /* Enable trap for each kfd vmid. */
1580                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1581                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1582         }
1583         soc21_grbm_select(adev, 0, 0, 0, 0);
1584         mutex_unlock(&adev->srbm_mutex);
1585
1586         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1587            acccess. These should be enabled by FW for target VMIDs. */
1588         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1589                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1590                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1591                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1592                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1593         }
1594 }
1595
1596 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1597 {
1598         int vmid;
1599
1600         /*
1601          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1602          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1603          * the driver can enable them for graphics. VMID0 should maintain
1604          * access so that HWS firmware can save/restore entries.
1605          */
1606         for (vmid = 1; vmid < 16; vmid++) {
1607                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1608                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1609                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1610                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1611         }
1612 }
1613
1614 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1615 {
1616         /* TODO: harvest feature to be added later. */
1617 }
1618
1619 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1620 {
1621         /* TCCs are global (not instanced). */
1622         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1623                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1624
1625         adev->gfx.config.tcc_disabled_mask =
1626                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1627                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1628 }
1629
1630 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1631 {
1632         u32 tmp;
1633         int i;
1634
1635         if (!amdgpu_sriov_vf(adev))
1636                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1637
1638         gfx_v11_0_setup_rb(adev);
1639         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1640         gfx_v11_0_get_tcc_info(adev);
1641         adev->gfx.config.pa_sc_tile_steering_override = 0;
1642
1643         /* XXX SH_MEM regs */
1644         /* where to put LDS, scratch, GPUVM in FSA64 space */
1645         mutex_lock(&adev->srbm_mutex);
1646         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1647                 soc21_grbm_select(adev, 0, 0, 0, i);
1648                 /* CP and shaders */
1649                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1650                 if (i != 0) {
1651                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1652                                 (adev->gmc.private_aperture_start >> 48));
1653                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1654                                 (adev->gmc.shared_aperture_start >> 48));
1655                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1656                 }
1657         }
1658         soc21_grbm_select(adev, 0, 0, 0, 0);
1659
1660         mutex_unlock(&adev->srbm_mutex);
1661
1662         gfx_v11_0_init_compute_vmid(adev);
1663         gfx_v11_0_init_gds_vmid(adev);
1664 }
1665
1666 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1667                                                bool enable)
1668 {
1669         u32 tmp;
1670
1671         if (amdgpu_sriov_vf(adev))
1672                 return;
1673
1674         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1675
1676         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1677                             enable ? 1 : 0);
1678         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1679                             enable ? 1 : 0);
1680         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1681                             enable ? 1 : 0);
1682         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1683                             enable ? 1 : 0);
1684
1685         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1686 }
1687
1688 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1689 {
1690         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1691
1692         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1693                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1694         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1695                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1696         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1697
1698         return 0;
1699 }
1700
1701 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1702 {
1703         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1704
1705         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1706         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1707 }
1708
1709 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1710 {
1711         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1712         udelay(50);
1713         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1714         udelay(50);
1715 }
1716
1717 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1718                                              bool enable)
1719 {
1720         uint32_t rlc_pg_cntl;
1721
1722         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1723
1724         if (!enable) {
1725                 /* RLC_PG_CNTL[23] = 0 (default)
1726                  * RLC will wait for handshake acks with SMU
1727                  * GFXOFF will be enabled
1728                  * RLC_PG_CNTL[23] = 1
1729                  * RLC will not issue any message to SMU
1730                  * hence no handshake between SMU & RLC
1731                  * GFXOFF will be disabled
1732                  */
1733                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1734         } else
1735                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1736         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1737 }
1738
1739 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1740 {
1741         /* TODO: enable rlc & smu handshake until smu
1742          * and gfxoff feature works as expected */
1743         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1744                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1745
1746         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1747         udelay(50);
1748 }
1749
1750 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1751 {
1752         uint32_t tmp;
1753
1754         /* enable Save Restore Machine */
1755         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1756         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1757         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1758         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1759 }
1760
1761 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1762 {
1763         const struct rlc_firmware_header_v2_0 *hdr;
1764         const __le32 *fw_data;
1765         unsigned i, fw_size;
1766
1767         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1768         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1769                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1770         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1771
1772         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1773                      RLCG_UCODE_LOADING_START_ADDRESS);
1774
1775         for (i = 0; i < fw_size; i++)
1776                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1777                              le32_to_cpup(fw_data++));
1778
1779         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1780 }
1781
1782 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1783 {
1784         const struct rlc_firmware_header_v2_2 *hdr;
1785         const __le32 *fw_data;
1786         unsigned i, fw_size;
1787         u32 tmp;
1788
1789         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1790
1791         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1792                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1793         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1794
1795         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1796
1797         for (i = 0; i < fw_size; i++) {
1798                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1799                         msleep(1);
1800                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1801                                 le32_to_cpup(fw_data++));
1802         }
1803
1804         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1805
1806         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1807                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1808         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1809
1810         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1811         for (i = 0; i < fw_size; i++) {
1812                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1813                         msleep(1);
1814                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1815                                 le32_to_cpup(fw_data++));
1816         }
1817
1818         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1819
1820         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1821         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1822         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1823         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1824 }
1825
1826 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1827 {
1828         const struct rlc_firmware_header_v2_3 *hdr;
1829         const __le32 *fw_data;
1830         unsigned i, fw_size;
1831         u32 tmp;
1832
1833         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1834
1835         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1836                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1837         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1838
1839         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1840
1841         for (i = 0; i < fw_size; i++) {
1842                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1843                         msleep(1);
1844                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1845                                 le32_to_cpup(fw_data++));
1846         }
1847
1848         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1849
1850         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1851         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1852         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1853
1854         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1855                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1856         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1857
1858         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1859
1860         for (i = 0; i < fw_size; i++) {
1861                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1862                         msleep(1);
1863                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1864                                 le32_to_cpup(fw_data++));
1865         }
1866
1867         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1868
1869         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1870         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1871         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1872 }
1873
1874 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1875 {
1876         const struct rlc_firmware_header_v2_0 *hdr;
1877         uint16_t version_major;
1878         uint16_t version_minor;
1879
1880         if (!adev->gfx.rlc_fw)
1881                 return -EINVAL;
1882
1883         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1884         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1885
1886         version_major = le16_to_cpu(hdr->header.header_version_major);
1887         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1888
1889         if (version_major == 2) {
1890                 gfx_v11_0_load_rlcg_microcode(adev);
1891                 if (amdgpu_dpm == 1) {
1892                         if (version_minor >= 2)
1893                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1894                         if (version_minor == 3)
1895                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1896                 }
1897                 
1898                 return 0;
1899         }
1900
1901         return -EINVAL;
1902 }
1903
1904 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1905 {
1906         int r;
1907
1908         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1909                 gfx_v11_0_init_csb(adev);
1910
1911                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1912                         gfx_v11_0_rlc_enable_srm(adev);
1913         } else {
1914                 if (amdgpu_sriov_vf(adev)) {
1915                         gfx_v11_0_init_csb(adev);
1916                         return 0;
1917                 }
1918
1919                 adev->gfx.rlc.funcs->stop(adev);
1920
1921                 /* disable CG */
1922                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1923
1924                 /* disable PG */
1925                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1926
1927                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1928                         /* legacy rlc firmware loading */
1929                         r = gfx_v11_0_rlc_load_microcode(adev);
1930                         if (r)
1931                                 return r;
1932                 }
1933
1934                 gfx_v11_0_init_csb(adev);
1935
1936                 adev->gfx.rlc.funcs->start(adev);
1937         }
1938         return 0;
1939 }
1940
1941 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
1942 {
1943         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1944         uint32_t tmp;
1945         int i;
1946
1947         /* Trigger an invalidation of the L1 instruction caches */
1948         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1949         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1950         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
1951
1952         /* Wait for invalidation complete */
1953         for (i = 0; i < usec_timeout; i++) {
1954                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1955                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
1956                                         INVALIDATE_CACHE_COMPLETE))
1957                         break;
1958                 udelay(1);
1959         }
1960
1961         if (i >= usec_timeout) {
1962                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
1963                 return -EINVAL;
1964         }
1965
1966         if (amdgpu_emu_mode == 1)
1967                 adev->hdp.funcs->flush_hdp(adev, NULL);
1968
1969         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
1970         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
1971         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
1972         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
1973         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1974         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
1975
1976         /* Program me ucode address into intruction cache address register */
1977         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
1978                         lower_32_bits(addr) & 0xFFFFF000);
1979         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
1980                         upper_32_bits(addr));
1981
1982         return 0;
1983 }
1984
1985 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
1986 {
1987         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1988         uint32_t tmp;
1989         int i;
1990
1991         /* Trigger an invalidation of the L1 instruction caches */
1992         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1993         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1994         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
1995
1996         /* Wait for invalidation complete */
1997         for (i = 0; i < usec_timeout; i++) {
1998                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
1999                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2000                                         INVALIDATE_CACHE_COMPLETE))
2001                         break;
2002                 udelay(1);
2003         }
2004
2005         if (i >= usec_timeout) {
2006                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2007                 return -EINVAL;
2008         }
2009
2010         if (amdgpu_emu_mode == 1)
2011                 adev->hdp.funcs->flush_hdp(adev, NULL);
2012
2013         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2014         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2015         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2016         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2017         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2018         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2019
2020         /* Program pfp ucode address into intruction cache address register */
2021         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2022                         lower_32_bits(addr) & 0xFFFFF000);
2023         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2024                         upper_32_bits(addr));
2025
2026         return 0;
2027 }
2028
2029 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2030 {
2031         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2032         uint32_t tmp;
2033         int i;
2034
2035         /* Trigger an invalidation of the L1 instruction caches */
2036         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2037         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2038
2039         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2040
2041         /* Wait for invalidation complete */
2042         for (i = 0; i < usec_timeout; i++) {
2043                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2044                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2045                                         INVALIDATE_CACHE_COMPLETE))
2046                         break;
2047                 udelay(1);
2048         }
2049
2050         if (i >= usec_timeout) {
2051                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2052                 return -EINVAL;
2053         }
2054
2055         if (amdgpu_emu_mode == 1)
2056                 adev->hdp.funcs->flush_hdp(adev, NULL);
2057
2058         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2059         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2060         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2061         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2062         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2063
2064         /* Program mec1 ucode address into intruction cache address register */
2065         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2066                         lower_32_bits(addr) & 0xFFFFF000);
2067         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2068                         upper_32_bits(addr));
2069
2070         return 0;
2071 }
2072
2073 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2074 {
2075         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2076         uint32_t tmp;
2077         unsigned i, pipe_id;
2078         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2079
2080         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2081                 adev->gfx.pfp_fw->data;
2082
2083         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2084                 lower_32_bits(addr));
2085         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2086                 upper_32_bits(addr));
2087
2088         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2089         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2090         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2091         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2092         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2093
2094         /*
2095          * Programming any of the CP_PFP_IC_BASE registers
2096          * forces invalidation of the ME L1 I$. Wait for the
2097          * invalidation complete
2098          */
2099         for (i = 0; i < usec_timeout; i++) {
2100                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2101                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2102                         INVALIDATE_CACHE_COMPLETE))
2103                         break;
2104                 udelay(1);
2105         }
2106
2107         if (i >= usec_timeout) {
2108                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2109                 return -EINVAL;
2110         }
2111
2112         /* Prime the L1 instruction caches */
2113         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2114         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2115         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2116         /* Waiting for cache primed*/
2117         for (i = 0; i < usec_timeout; i++) {
2118                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2119                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2120                         ICACHE_PRIMED))
2121                         break;
2122                 udelay(1);
2123         }
2124
2125         if (i >= usec_timeout) {
2126                 dev_err(adev->dev, "failed to prime instruction cache\n");
2127                 return -EINVAL;
2128         }
2129
2130         mutex_lock(&adev->srbm_mutex);
2131         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2132                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2133                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2134                         (pfp_hdr->ucode_start_addr_hi << 30) |
2135                         (pfp_hdr->ucode_start_addr_lo >> 2));
2136                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2137                         pfp_hdr->ucode_start_addr_hi >> 2);
2138
2139                 /*
2140                  * Program CP_ME_CNTL to reset given PIPE to take
2141                  * effect of CP_PFP_PRGRM_CNTR_START.
2142                  */
2143                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2144                 if (pipe_id == 0)
2145                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2146                                         PFP_PIPE0_RESET, 1);
2147                 else
2148                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2149                                         PFP_PIPE1_RESET, 1);
2150                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2151
2152                 /* Clear pfp pipe0 reset bit. */
2153                 if (pipe_id == 0)
2154                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2155                                         PFP_PIPE0_RESET, 0);
2156                 else
2157                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2158                                         PFP_PIPE1_RESET, 0);
2159                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2160
2161                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2162                         lower_32_bits(addr2));
2163                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2164                         upper_32_bits(addr2));
2165         }
2166         soc21_grbm_select(adev, 0, 0, 0, 0);
2167         mutex_unlock(&adev->srbm_mutex);
2168
2169         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2170         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2171         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2172         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2173
2174         /* Invalidate the data caches */
2175         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2176         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2177         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2178
2179         for (i = 0; i < usec_timeout; i++) {
2180                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2181                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2182                         INVALIDATE_DCACHE_COMPLETE))
2183                         break;
2184                 udelay(1);
2185         }
2186
2187         if (i >= usec_timeout) {
2188                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2189                 return -EINVAL;
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2196 {
2197         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2198         uint32_t tmp;
2199         unsigned i, pipe_id;
2200         const struct gfx_firmware_header_v2_0 *me_hdr;
2201
2202         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2203                 adev->gfx.me_fw->data;
2204
2205         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2206                 lower_32_bits(addr));
2207         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2208                 upper_32_bits(addr));
2209
2210         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2211         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2212         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2213         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2214         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2215
2216         /*
2217          * Programming any of the CP_ME_IC_BASE registers
2218          * forces invalidation of the ME L1 I$. Wait for the
2219          * invalidation complete
2220          */
2221         for (i = 0; i < usec_timeout; i++) {
2222                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2223                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2224                         INVALIDATE_CACHE_COMPLETE))
2225                         break;
2226                 udelay(1);
2227         }
2228
2229         if (i >= usec_timeout) {
2230                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2231                 return -EINVAL;
2232         }
2233
2234         /* Prime the instruction caches */
2235         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2236         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2237         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2238
2239         /* Waiting for instruction cache primed*/
2240         for (i = 0; i < usec_timeout; i++) {
2241                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2242                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2243                         ICACHE_PRIMED))
2244                         break;
2245                 udelay(1);
2246         }
2247
2248         if (i >= usec_timeout) {
2249                 dev_err(adev->dev, "failed to prime instruction cache\n");
2250                 return -EINVAL;
2251         }
2252
2253         mutex_lock(&adev->srbm_mutex);
2254         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2255                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2256                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2257                         (me_hdr->ucode_start_addr_hi << 30) |
2258                         (me_hdr->ucode_start_addr_lo >> 2) );
2259                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2260                         me_hdr->ucode_start_addr_hi>>2);
2261
2262                 /*
2263                  * Program CP_ME_CNTL to reset given PIPE to take
2264                  * effect of CP_PFP_PRGRM_CNTR_START.
2265                  */
2266                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2267                 if (pipe_id == 0)
2268                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2269                                         ME_PIPE0_RESET, 1);
2270                 else
2271                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2272                                         ME_PIPE1_RESET, 1);
2273                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2274
2275                 /* Clear pfp pipe0 reset bit. */
2276                 if (pipe_id == 0)
2277                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2278                                         ME_PIPE0_RESET, 0);
2279                 else
2280                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2281                                         ME_PIPE1_RESET, 0);
2282                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2283
2284                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2285                         lower_32_bits(addr2));
2286                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2287                         upper_32_bits(addr2));
2288         }
2289         soc21_grbm_select(adev, 0, 0, 0, 0);
2290         mutex_unlock(&adev->srbm_mutex);
2291
2292         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2293         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2294         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2295         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2296
2297         /* Invalidate the data caches */
2298         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2299         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2300         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2301
2302         for (i = 0; i < usec_timeout; i++) {
2303                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2304                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2305                         INVALIDATE_DCACHE_COMPLETE))
2306                         break;
2307                 udelay(1);
2308         }
2309
2310         if (i >= usec_timeout) {
2311                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2312                 return -EINVAL;
2313         }
2314
2315         return 0;
2316 }
2317
2318 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2319 {
2320         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2321         uint32_t tmp;
2322         unsigned i;
2323         const struct gfx_firmware_header_v2_0 *mec_hdr;
2324
2325         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2326                 adev->gfx.mec_fw->data;
2327
2328         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2329         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2330         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2331         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2332         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2333
2334         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2335         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2336         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2337         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2338
2339         mutex_lock(&adev->srbm_mutex);
2340         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2341                 soc21_grbm_select(adev, 1, i, 0, 0);
2342
2343                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2344                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2345                      upper_32_bits(addr2));
2346
2347                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2348                                         mec_hdr->ucode_start_addr_lo >> 2 |
2349                                         mec_hdr->ucode_start_addr_hi << 30);
2350                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2351                                         mec_hdr->ucode_start_addr_hi >> 2);
2352
2353                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2354                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2355                      upper_32_bits(addr));
2356         }
2357         mutex_unlock(&adev->srbm_mutex);
2358         soc21_grbm_select(adev, 0, 0, 0, 0);
2359
2360         /* Trigger an invalidation of the L1 instruction caches */
2361         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2362         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2363         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2364
2365         /* Wait for invalidation complete */
2366         for (i = 0; i < usec_timeout; i++) {
2367                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2368                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2369                                        INVALIDATE_DCACHE_COMPLETE))
2370                         break;
2371                 udelay(1);
2372         }
2373
2374         if (i >= usec_timeout) {
2375                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2376                 return -EINVAL;
2377         }
2378
2379         /* Trigger an invalidation of the L1 instruction caches */
2380         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2381         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2382         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2383
2384         /* Wait for invalidation complete */
2385         for (i = 0; i < usec_timeout; i++) {
2386                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2387                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2388                                        INVALIDATE_CACHE_COMPLETE))
2389                         break;
2390                 udelay(1);
2391         }
2392
2393         if (i >= usec_timeout) {
2394                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2395                 return -EINVAL;
2396         }
2397
2398         return 0;
2399 }
2400
2401 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2402 {
2403         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2404         const struct gfx_firmware_header_v2_0 *me_hdr;
2405         const struct gfx_firmware_header_v2_0 *mec_hdr;
2406         uint32_t pipe_id, tmp;
2407
2408         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2409                 adev->gfx.mec_fw->data;
2410         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2411                 adev->gfx.me_fw->data;
2412         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2413                 adev->gfx.pfp_fw->data;
2414
2415         /* config pfp program start addr */
2416         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2417                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2418                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2419                         (pfp_hdr->ucode_start_addr_hi << 30) |
2420                         (pfp_hdr->ucode_start_addr_lo >> 2));
2421                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2422                         pfp_hdr->ucode_start_addr_hi >> 2);
2423         }
2424         soc21_grbm_select(adev, 0, 0, 0, 0);
2425
2426         /* reset pfp pipe */
2427         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2428         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2429         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2430         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2431
2432         /* clear pfp pipe reset */
2433         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2434         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2435         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2436
2437         /* config me program start addr */
2438         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2439                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2440                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2441                         (me_hdr->ucode_start_addr_hi << 30) |
2442                         (me_hdr->ucode_start_addr_lo >> 2) );
2443                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2444                         me_hdr->ucode_start_addr_hi>>2);
2445         }
2446         soc21_grbm_select(adev, 0, 0, 0, 0);
2447
2448         /* reset me pipe */
2449         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2450         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2451         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2452         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2453
2454         /* clear me pipe reset */
2455         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2456         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2457         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2458
2459         /* config mec program start addr */
2460         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2461                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2462                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2463                                         mec_hdr->ucode_start_addr_lo >> 2 |
2464                                         mec_hdr->ucode_start_addr_hi << 30);
2465                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2466                                         mec_hdr->ucode_start_addr_hi >> 2);
2467         }
2468         soc21_grbm_select(adev, 0, 0, 0, 0);
2469
2470         /* reset mec pipe */
2471         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2472         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2473         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2474         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2475         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2476         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2477
2478         /* clear mec pipe reset */
2479         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2480         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2481         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2482         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2483         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2484 }
2485
2486 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2487 {
2488         uint32_t cp_status;
2489         uint32_t bootload_status;
2490         int i, r;
2491         uint64_t addr, addr2;
2492
2493         for (i = 0; i < adev->usec_timeout; i++) {
2494                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2495
2496                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
2497                                 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
2498                         bootload_status = RREG32_SOC15(GC, 0,
2499                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2500                 else
2501                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2502
2503                 if ((cp_status == 0) &&
2504                     (REG_GET_FIELD(bootload_status,
2505                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2506                         break;
2507                 }
2508                 udelay(1);
2509         }
2510
2511         if (i >= adev->usec_timeout) {
2512                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2513                 return -ETIMEDOUT;
2514         }
2515
2516         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2517                 if (adev->gfx.rs64_enable) {
2518                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2519                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2520                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2521                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2522                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2523                         if (r)
2524                                 return r;
2525                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2526                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2527                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2528                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2529                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2530                         if (r)
2531                                 return r;
2532                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2533                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2534                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2535                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2536                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2537                         if (r)
2538                                 return r;
2539                 } else {
2540                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2541                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2542                         r = gfx_v11_0_config_me_cache(adev, addr);
2543                         if (r)
2544                                 return r;
2545                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2546                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2547                         r = gfx_v11_0_config_pfp_cache(adev, addr);
2548                         if (r)
2549                                 return r;
2550                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2551                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2552                         r = gfx_v11_0_config_mec_cache(adev, addr);
2553                         if (r)
2554                                 return r;
2555                 }
2556         }
2557
2558         return 0;
2559 }
2560
2561 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2562 {
2563         int i;
2564         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2565
2566         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2567         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2568         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2569
2570         for (i = 0; i < adev->usec_timeout; i++) {
2571                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2572                         break;
2573                 udelay(1);
2574         }
2575
2576         if (i >= adev->usec_timeout)
2577                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2578
2579         return 0;
2580 }
2581
2582 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2583 {
2584         int r;
2585         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2586         const __le32 *fw_data;
2587         unsigned i, fw_size;
2588
2589         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2590                 adev->gfx.pfp_fw->data;
2591
2592         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2593
2594         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2595                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2596         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2597
2598         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2599                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2600                                       &adev->gfx.pfp.pfp_fw_obj,
2601                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2602                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2603         if (r) {
2604                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2605                 gfx_v11_0_pfp_fini(adev);
2606                 return r;
2607         }
2608
2609         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2610
2611         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2612         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2613
2614         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2615
2616         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2617
2618         for (i = 0; i < pfp_hdr->jt_size; i++)
2619                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2620                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2621
2622         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2623
2624         return 0;
2625 }
2626
2627 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2628 {
2629         int r;
2630         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2631         const __le32 *fw_ucode, *fw_data;
2632         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2633         uint32_t tmp;
2634         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2635
2636         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2637                 adev->gfx.pfp_fw->data;
2638
2639         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2640
2641         /* instruction */
2642         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2643                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2644         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2645         /* data */
2646         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2647                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2648         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2649
2650         /* 64kb align */
2651         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2652                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2653                                       &adev->gfx.pfp.pfp_fw_obj,
2654                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2655                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2656         if (r) {
2657                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2658                 gfx_v11_0_pfp_fini(adev);
2659                 return r;
2660         }
2661
2662         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2663                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2664                                       &adev->gfx.pfp.pfp_fw_data_obj,
2665                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2666                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2667         if (r) {
2668                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2669                 gfx_v11_0_pfp_fini(adev);
2670                 return r;
2671         }
2672
2673         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2674         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2675
2676         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2677         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2678         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2679         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2680
2681         if (amdgpu_emu_mode == 1)
2682                 adev->hdp.funcs->flush_hdp(adev, NULL);
2683
2684         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2685                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2686         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2687                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2688
2689         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2690         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2691         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2692         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2693         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2694
2695         /*
2696          * Programming any of the CP_PFP_IC_BASE registers
2697          * forces invalidation of the ME L1 I$. Wait for the
2698          * invalidation complete
2699          */
2700         for (i = 0; i < usec_timeout; i++) {
2701                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2702                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2703                         INVALIDATE_CACHE_COMPLETE))
2704                         break;
2705                 udelay(1);
2706         }
2707
2708         if (i >= usec_timeout) {
2709                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2710                 return -EINVAL;
2711         }
2712
2713         /* Prime the L1 instruction caches */
2714         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2715         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2716         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2717         /* Waiting for cache primed*/
2718         for (i = 0; i < usec_timeout; i++) {
2719                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2720                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2721                         ICACHE_PRIMED))
2722                         break;
2723                 udelay(1);
2724         }
2725
2726         if (i >= usec_timeout) {
2727                 dev_err(adev->dev, "failed to prime instruction cache\n");
2728                 return -EINVAL;
2729         }
2730
2731         mutex_lock(&adev->srbm_mutex);
2732         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2733                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2734                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2735                         (pfp_hdr->ucode_start_addr_hi << 30) |
2736                         (pfp_hdr->ucode_start_addr_lo >> 2) );
2737                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2738                         pfp_hdr->ucode_start_addr_hi>>2);
2739
2740                 /*
2741                  * Program CP_ME_CNTL to reset given PIPE to take
2742                  * effect of CP_PFP_PRGRM_CNTR_START.
2743                  */
2744                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2745                 if (pipe_id == 0)
2746                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2747                                         PFP_PIPE0_RESET, 1);
2748                 else
2749                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2750                                         PFP_PIPE1_RESET, 1);
2751                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2752
2753                 /* Clear pfp pipe0 reset bit. */
2754                 if (pipe_id == 0)
2755                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2756                                         PFP_PIPE0_RESET, 0);
2757                 else
2758                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2759                                         PFP_PIPE1_RESET, 0);
2760                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2761
2762                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2763                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2764                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2765                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2766         }
2767         soc21_grbm_select(adev, 0, 0, 0, 0);
2768         mutex_unlock(&adev->srbm_mutex);
2769
2770         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2771         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2772         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2773         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2774
2775         /* Invalidate the data caches */
2776         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2777         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2778         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2779
2780         for (i = 0; i < usec_timeout; i++) {
2781                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2782                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2783                         INVALIDATE_DCACHE_COMPLETE))
2784                         break;
2785                 udelay(1);
2786         }
2787
2788         if (i >= usec_timeout) {
2789                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2790                 return -EINVAL;
2791         }
2792
2793         return 0;
2794 }
2795
2796 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2797 {
2798         int r;
2799         const struct gfx_firmware_header_v1_0 *me_hdr;
2800         const __le32 *fw_data;
2801         unsigned i, fw_size;
2802
2803         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2804                 adev->gfx.me_fw->data;
2805
2806         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2807
2808         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2809                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2810         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2811
2812         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2813                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2814                                       &adev->gfx.me.me_fw_obj,
2815                                       &adev->gfx.me.me_fw_gpu_addr,
2816                                       (void **)&adev->gfx.me.me_fw_ptr);
2817         if (r) {
2818                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2819                 gfx_v11_0_me_fini(adev);
2820                 return r;
2821         }
2822
2823         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2824
2825         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2826         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2827
2828         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2829
2830         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2831
2832         for (i = 0; i < me_hdr->jt_size; i++)
2833                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2834                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2835
2836         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2837
2838         return 0;
2839 }
2840
2841 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2842 {
2843         int r;
2844         const struct gfx_firmware_header_v2_0 *me_hdr;
2845         const __le32 *fw_ucode, *fw_data;
2846         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2847         uint32_t tmp;
2848         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2849
2850         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2851                 adev->gfx.me_fw->data;
2852
2853         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2854
2855         /* instruction */
2856         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2857                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2858         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2859         /* data */
2860         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2861                 le32_to_cpu(me_hdr->data_offset_bytes));
2862         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2863
2864         /* 64kb align*/
2865         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2866                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2867                                       &adev->gfx.me.me_fw_obj,
2868                                       &adev->gfx.me.me_fw_gpu_addr,
2869                                       (void **)&adev->gfx.me.me_fw_ptr);
2870         if (r) {
2871                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2872                 gfx_v11_0_me_fini(adev);
2873                 return r;
2874         }
2875
2876         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2877                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2878                                       &adev->gfx.me.me_fw_data_obj,
2879                                       &adev->gfx.me.me_fw_data_gpu_addr,
2880                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2881         if (r) {
2882                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2883                 gfx_v11_0_pfp_fini(adev);
2884                 return r;
2885         }
2886
2887         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2888         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2889
2890         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2891         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2892         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2893         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2894
2895         if (amdgpu_emu_mode == 1)
2896                 adev->hdp.funcs->flush_hdp(adev, NULL);
2897
2898         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2899                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2900         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2901                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2902
2903         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2904         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2905         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2906         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2907         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2908
2909         /*
2910          * Programming any of the CP_ME_IC_BASE registers
2911          * forces invalidation of the ME L1 I$. Wait for the
2912          * invalidation complete
2913          */
2914         for (i = 0; i < usec_timeout; i++) {
2915                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2916                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2917                         INVALIDATE_CACHE_COMPLETE))
2918                         break;
2919                 udelay(1);
2920         }
2921
2922         if (i >= usec_timeout) {
2923                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2924                 return -EINVAL;
2925         }
2926
2927         /* Prime the instruction caches */
2928         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2929         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2930         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2931
2932         /* Waiting for instruction cache primed*/
2933         for (i = 0; i < usec_timeout; i++) {
2934                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2935                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2936                         ICACHE_PRIMED))
2937                         break;
2938                 udelay(1);
2939         }
2940
2941         if (i >= usec_timeout) {
2942                 dev_err(adev->dev, "failed to prime instruction cache\n");
2943                 return -EINVAL;
2944         }
2945
2946         mutex_lock(&adev->srbm_mutex);
2947         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2948                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2949                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2950                         (me_hdr->ucode_start_addr_hi << 30) |
2951                         (me_hdr->ucode_start_addr_lo >> 2) );
2952                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2953                         me_hdr->ucode_start_addr_hi>>2);
2954
2955                 /*
2956                  * Program CP_ME_CNTL to reset given PIPE to take
2957                  * effect of CP_PFP_PRGRM_CNTR_START.
2958                  */
2959                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2960                 if (pipe_id == 0)
2961                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2962                                         ME_PIPE0_RESET, 1);
2963                 else
2964                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2965                                         ME_PIPE1_RESET, 1);
2966                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2967
2968                 /* Clear pfp pipe0 reset bit. */
2969                 if (pipe_id == 0)
2970                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2971                                         ME_PIPE0_RESET, 0);
2972                 else
2973                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2974                                         ME_PIPE1_RESET, 0);
2975                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2976
2977                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2978                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2979                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2980                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2981         }
2982         soc21_grbm_select(adev, 0, 0, 0, 0);
2983         mutex_unlock(&adev->srbm_mutex);
2984
2985         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2986         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2987         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2988         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2989
2990         /* Invalidate the data caches */
2991         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2992         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2993         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2994
2995         for (i = 0; i < usec_timeout; i++) {
2996                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2997                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2998                         INVALIDATE_DCACHE_COMPLETE))
2999                         break;
3000                 udelay(1);
3001         }
3002
3003         if (i >= usec_timeout) {
3004                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3005                 return -EINVAL;
3006         }
3007
3008         return 0;
3009 }
3010
3011 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3012 {
3013         int r;
3014
3015         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3016                 return -EINVAL;
3017
3018         gfx_v11_0_cp_gfx_enable(adev, false);
3019
3020         if (adev->gfx.rs64_enable)
3021                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3022         else
3023                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3024         if (r) {
3025                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3026                 return r;
3027         }
3028
3029         if (adev->gfx.rs64_enable)
3030                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3031         else
3032                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3033         if (r) {
3034                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3035                 return r;
3036         }
3037
3038         return 0;
3039 }
3040
3041 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3042 {
3043         struct amdgpu_ring *ring;
3044         const struct cs_section_def *sect = NULL;
3045         const struct cs_extent_def *ext = NULL;
3046         int r, i;
3047         int ctx_reg_offset;
3048
3049         /* init the CP */
3050         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3051                      adev->gfx.config.max_hw_contexts - 1);
3052         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3053
3054         if (!amdgpu_async_gfx_ring)
3055                 gfx_v11_0_cp_gfx_enable(adev, true);
3056
3057         ring = &adev->gfx.gfx_ring[0];
3058         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3059         if (r) {
3060                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3061                 return r;
3062         }
3063
3064         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3065         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3066
3067         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3068         amdgpu_ring_write(ring, 0x80000000);
3069         amdgpu_ring_write(ring, 0x80000000);
3070
3071         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3072                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3073                         if (sect->id == SECT_CONTEXT) {
3074                                 amdgpu_ring_write(ring,
3075                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
3076                                                           ext->reg_count));
3077                                 amdgpu_ring_write(ring, ext->reg_index -
3078                                                   PACKET3_SET_CONTEXT_REG_START);
3079                                 for (i = 0; i < ext->reg_count; i++)
3080                                         amdgpu_ring_write(ring, ext->extent[i]);
3081                         }
3082                 }
3083         }
3084
3085         ctx_reg_offset =
3086                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3087         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3088         amdgpu_ring_write(ring, ctx_reg_offset);
3089         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3090
3091         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3092         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3093
3094         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3095         amdgpu_ring_write(ring, 0);
3096
3097         amdgpu_ring_commit(ring);
3098
3099         /* submit cs packet to copy state 0 to next available state */
3100         if (adev->gfx.num_gfx_rings > 1) {
3101                 /* maximum supported gfx ring is 2 */
3102                 ring = &adev->gfx.gfx_ring[1];
3103                 r = amdgpu_ring_alloc(ring, 2);
3104                 if (r) {
3105                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3106                         return r;
3107                 }
3108
3109                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3110                 amdgpu_ring_write(ring, 0);
3111
3112                 amdgpu_ring_commit(ring);
3113         }
3114         return 0;
3115 }
3116
3117 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3118                                          CP_PIPE_ID pipe)
3119 {
3120         u32 tmp;
3121
3122         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3123         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3124
3125         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3126 }
3127
3128 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3129                                           struct amdgpu_ring *ring)
3130 {
3131         u32 tmp;
3132
3133         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3134         if (ring->use_doorbell) {
3135                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3136                                     DOORBELL_OFFSET, ring->doorbell_index);
3137                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3138                                     DOORBELL_EN, 1);
3139         } else {
3140                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3141                                     DOORBELL_EN, 0);
3142         }
3143         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3144
3145         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3146                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
3147         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3148
3149         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3150                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3151 }
3152
3153 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3154 {
3155         struct amdgpu_ring *ring;
3156         u32 tmp;
3157         u32 rb_bufsz;
3158         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3159         u32 i;
3160
3161         /* Set the write pointer delay */
3162         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3163
3164         /* set the RB to use vmid 0 */
3165         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3166
3167         /* Init gfx ring 0 for pipe 0 */
3168         mutex_lock(&adev->srbm_mutex);
3169         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3170
3171         /* Set ring buffer size */
3172         ring = &adev->gfx.gfx_ring[0];
3173         rb_bufsz = order_base_2(ring->ring_size / 8);
3174         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3175         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3176         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3177
3178         /* Initialize the ring buffer's write pointers */
3179         ring->wptr = 0;
3180         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3181         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3182
3183         /* set the wb address wether it's enabled or not */
3184         rptr_addr = ring->rptr_gpu_addr;
3185         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3186         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3187                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3188
3189         wptr_gpu_addr = ring->wptr_gpu_addr;
3190         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3191                      lower_32_bits(wptr_gpu_addr));
3192         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3193                      upper_32_bits(wptr_gpu_addr));
3194
3195         mdelay(1);
3196         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3197
3198         rb_addr = ring->gpu_addr >> 8;
3199         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3200         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3201
3202         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3203
3204         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3205         mutex_unlock(&adev->srbm_mutex);
3206
3207         /* Init gfx ring 1 for pipe 1 */
3208         if (adev->gfx.num_gfx_rings > 1) {
3209                 mutex_lock(&adev->srbm_mutex);
3210                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3211                 /* maximum supported gfx ring is 2 */
3212                 ring = &adev->gfx.gfx_ring[1];
3213                 rb_bufsz = order_base_2(ring->ring_size / 8);
3214                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3215                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3216                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3217                 /* Initialize the ring buffer's write pointers */
3218                 ring->wptr = 0;
3219                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3220                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3221                 /* Set the wb address wether it's enabled or not */
3222                 rptr_addr = ring->rptr_gpu_addr;
3223                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3224                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3225                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3226                 wptr_gpu_addr = ring->wptr_gpu_addr;
3227                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3228                              lower_32_bits(wptr_gpu_addr));
3229                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3230                              upper_32_bits(wptr_gpu_addr));
3231
3232                 mdelay(1);
3233                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3234
3235                 rb_addr = ring->gpu_addr >> 8;
3236                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3237                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3238                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3239
3240                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3241                 mutex_unlock(&adev->srbm_mutex);
3242         }
3243         /* Switch to pipe 0 */
3244         mutex_lock(&adev->srbm_mutex);
3245         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3246         mutex_unlock(&adev->srbm_mutex);
3247
3248         /* start the ring */
3249         gfx_v11_0_cp_gfx_start(adev);
3250
3251         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3252                 ring = &adev->gfx.gfx_ring[i];
3253                 ring->sched.ready = true;
3254         }
3255
3256         return 0;
3257 }
3258
3259 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3260 {
3261         u32 data;
3262
3263         if (adev->gfx.rs64_enable) {
3264                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3265                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3266                                                          enable ? 0 : 1);
3267                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3268                                                          enable ? 0 : 1);
3269                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3270                                                          enable ? 0 : 1);
3271                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3272                                                          enable ? 0 : 1);
3273                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3274                                                          enable ? 0 : 1);
3275                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3276                                                          enable ? 1 : 0);
3277                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3278                                                          enable ? 1 : 0);
3279                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3280                                                          enable ? 1 : 0);
3281                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3282                                                          enable ? 1 : 0);
3283                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3284                                                          enable ? 0 : 1);
3285                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3286         } else {
3287                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3288
3289                 if (enable) {
3290                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3291                         if (!adev->enable_mes_kiq)
3292                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3293                                                      MEC_ME2_HALT, 0);
3294                 } else {
3295                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3296                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3297                 }
3298                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3299         }
3300
3301         adev->gfx.kiq.ring.sched.ready = enable;
3302
3303         udelay(50);
3304 }
3305
3306 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3307 {
3308         const struct gfx_firmware_header_v1_0 *mec_hdr;
3309         const __le32 *fw_data;
3310         unsigned i, fw_size;
3311         u32 *fw = NULL;
3312         int r;
3313
3314         if (!adev->gfx.mec_fw)
3315                 return -EINVAL;
3316
3317         gfx_v11_0_cp_compute_enable(adev, false);
3318
3319         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3320         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3321
3322         fw_data = (const __le32 *)
3323                 (adev->gfx.mec_fw->data +
3324                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3325         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3326
3327         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3328                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3329                                           &adev->gfx.mec.mec_fw_obj,
3330                                           &adev->gfx.mec.mec_fw_gpu_addr,
3331                                           (void **)&fw);
3332         if (r) {
3333                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3334                 gfx_v11_0_mec_fini(adev);
3335                 return r;
3336         }
3337
3338         memcpy(fw, fw_data, fw_size);
3339         
3340         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3341         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3342
3343         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3344
3345         /* MEC1 */
3346         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3347
3348         for (i = 0; i < mec_hdr->jt_size; i++)
3349                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3350                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3351
3352         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3353
3354         return 0;
3355 }
3356
3357 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3358 {
3359         const struct gfx_firmware_header_v2_0 *mec_hdr;
3360         const __le32 *fw_ucode, *fw_data;
3361         u32 tmp, fw_ucode_size, fw_data_size;
3362         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3363         u32 *fw_ucode_ptr, *fw_data_ptr;
3364         int r;
3365
3366         if (!adev->gfx.mec_fw)
3367                 return -EINVAL;
3368
3369         gfx_v11_0_cp_compute_enable(adev, false);
3370
3371         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3372         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3373
3374         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3375                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3376         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3377
3378         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3379                                 le32_to_cpu(mec_hdr->data_offset_bytes));
3380         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3381
3382         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3383                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3384                                       &adev->gfx.mec.mec_fw_obj,
3385                                       &adev->gfx.mec.mec_fw_gpu_addr,
3386                                       (void **)&fw_ucode_ptr);
3387         if (r) {
3388                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3389                 gfx_v11_0_mec_fini(adev);
3390                 return r;
3391         }
3392
3393         r = amdgpu_bo_create_reserved(adev, fw_data_size,
3394                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3395                                       &adev->gfx.mec.mec_fw_data_obj,
3396                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3397                                       (void **)&fw_data_ptr);
3398         if (r) {
3399                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3400                 gfx_v11_0_mec_fini(adev);
3401                 return r;
3402         }
3403
3404         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3405         memcpy(fw_data_ptr, fw_data, fw_data_size);
3406
3407         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3408         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3409         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3410         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3411
3412         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3413         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3414         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3415         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3416         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3417
3418         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3419         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3420         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3421         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3422
3423         mutex_lock(&adev->srbm_mutex);
3424         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3425                 soc21_grbm_select(adev, 1, i, 0, 0);
3426
3427                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3428                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3429                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3430
3431                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3432                                         mec_hdr->ucode_start_addr_lo >> 2 |
3433                                         mec_hdr->ucode_start_addr_hi << 30);
3434                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3435                                         mec_hdr->ucode_start_addr_hi >> 2);
3436
3437                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3438                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3439                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3440         }
3441         mutex_unlock(&adev->srbm_mutex);
3442         soc21_grbm_select(adev, 0, 0, 0, 0);
3443
3444         /* Trigger an invalidation of the L1 instruction caches */
3445         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3446         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3447         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3448
3449         /* Wait for invalidation complete */
3450         for (i = 0; i < usec_timeout; i++) {
3451                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3452                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3453                                        INVALIDATE_DCACHE_COMPLETE))
3454                         break;
3455                 udelay(1);
3456         }
3457
3458         if (i >= usec_timeout) {
3459                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3460                 return -EINVAL;
3461         }
3462
3463         /* Trigger an invalidation of the L1 instruction caches */
3464         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3465         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3466         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3467
3468         /* Wait for invalidation complete */
3469         for (i = 0; i < usec_timeout; i++) {
3470                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3471                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3472                                        INVALIDATE_CACHE_COMPLETE))
3473                         break;
3474                 udelay(1);
3475         }
3476
3477         if (i >= usec_timeout) {
3478                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3479                 return -EINVAL;
3480         }
3481
3482         return 0;
3483 }
3484
3485 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3486 {
3487         uint32_t tmp;
3488         struct amdgpu_device *adev = ring->adev;
3489
3490         /* tell RLC which is KIQ queue */
3491         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3492         tmp &= 0xffffff00;
3493         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3494         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3495         tmp |= 0x80;
3496         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3497 }
3498
3499 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3500 {
3501         /* set graphics engine doorbell range */
3502         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3503                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
3504         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3505                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3506
3507         /* set compute engine doorbell range */
3508         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3509                      (adev->doorbell_index.kiq * 2) << 2);
3510         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3511                      (adev->doorbell_index.userqueue_end * 2) << 2);
3512 }
3513
3514 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3515                                   struct amdgpu_mqd_prop *prop)
3516 {
3517         struct v11_gfx_mqd *mqd = m;
3518         uint64_t hqd_gpu_addr, wb_gpu_addr;
3519         uint32_t tmp;
3520         uint32_t rb_bufsz;
3521
3522         /* set up gfx hqd wptr */
3523         mqd->cp_gfx_hqd_wptr = 0;
3524         mqd->cp_gfx_hqd_wptr_hi = 0;
3525
3526         /* set the pointer to the MQD */
3527         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3528         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3529
3530         /* set up mqd control */
3531         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3532         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3533         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3534         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3535         mqd->cp_gfx_mqd_control = tmp;
3536
3537         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3538         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3539         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3540         mqd->cp_gfx_hqd_vmid = 0;
3541
3542         /* set up default queue priority level
3543          * 0x0 = low priority, 0x1 = high priority */
3544         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3545         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3546         mqd->cp_gfx_hqd_queue_priority = tmp;
3547
3548         /* set up time quantum */
3549         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3550         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3551         mqd->cp_gfx_hqd_quantum = tmp;
3552
3553         /* set up gfx hqd base. this is similar as CP_RB_BASE */
3554         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3555         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3556         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3557
3558         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3559         wb_gpu_addr = prop->rptr_gpu_addr;
3560         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3561         mqd->cp_gfx_hqd_rptr_addr_hi =
3562                 upper_32_bits(wb_gpu_addr) & 0xffff;
3563
3564         /* set up rb_wptr_poll addr */
3565         wb_gpu_addr = prop->wptr_gpu_addr;
3566         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3567         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3568
3569         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3570         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3571         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3572         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3573         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3574 #ifdef __BIG_ENDIAN
3575         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3576 #endif
3577         mqd->cp_gfx_hqd_cntl = tmp;
3578
3579         /* set up cp_doorbell_control */
3580         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3581         if (prop->use_doorbell) {
3582                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3583                                     DOORBELL_OFFSET, prop->doorbell_index);
3584                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3585                                     DOORBELL_EN, 1);
3586         } else
3587                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3588                                     DOORBELL_EN, 0);
3589         mqd->cp_rb_doorbell_control = tmp;
3590
3591         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3592         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3593
3594         /* active the queue */
3595         mqd->cp_gfx_hqd_active = 1;
3596
3597         return 0;
3598 }
3599
3600 #ifdef BRING_UP_DEBUG
3601 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3602 {
3603         struct amdgpu_device *adev = ring->adev;
3604         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3605
3606         /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3607         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3608         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3609
3610         /* set GFX_MQD_BASE */
3611         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3612         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3613
3614         /* set GFX_MQD_CONTROL */
3615         WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3616
3617         /* set GFX_HQD_VMID to 0 */
3618         WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3619
3620         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3621                         mqd->cp_gfx_hqd_queue_priority);
3622         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3623
3624         /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3625         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3626         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3627
3628         /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3629         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3630         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3631
3632         /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3633         WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3634
3635         /* set RB_WPTR_POLL_ADDR */
3636         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3637         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3638
3639         /* set RB_DOORBELL_CONTROL */
3640         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3641
3642         /* active the queue */
3643         WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3644
3645         return 0;
3646 }
3647 #endif
3648
3649 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3650 {
3651         struct amdgpu_device *adev = ring->adev;
3652         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3653         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3654
3655         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3656                 memset((void *)mqd, 0, sizeof(*mqd));
3657                 mutex_lock(&adev->srbm_mutex);
3658                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3659                 amdgpu_ring_init_mqd(ring);
3660 #ifdef BRING_UP_DEBUG
3661                 gfx_v11_0_gfx_queue_init_register(ring);
3662 #endif
3663                 soc21_grbm_select(adev, 0, 0, 0, 0);
3664                 mutex_unlock(&adev->srbm_mutex);
3665                 if (adev->gfx.me.mqd_backup[mqd_idx])
3666                         memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3667         } else if (amdgpu_in_reset(adev)) {
3668                 /* reset mqd with the backup copy */
3669                 if (adev->gfx.me.mqd_backup[mqd_idx])
3670                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3671                 /* reset the ring */
3672                 ring->wptr = 0;
3673                 *ring->wptr_cpu_addr = 0;
3674                 amdgpu_ring_clear_ring(ring);
3675 #ifdef BRING_UP_DEBUG
3676                 mutex_lock(&adev->srbm_mutex);
3677                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3678                 gfx_v11_0_gfx_queue_init_register(ring);
3679                 soc21_grbm_select(adev, 0, 0, 0, 0);
3680                 mutex_unlock(&adev->srbm_mutex);
3681 #endif
3682         } else {
3683                 amdgpu_ring_clear_ring(ring);
3684         }
3685
3686         return 0;
3687 }
3688
3689 #ifndef BRING_UP_DEBUG
3690 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3691 {
3692         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3693         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3694         int r, i;
3695
3696         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3697                 return -EINVAL;
3698
3699         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3700                                         adev->gfx.num_gfx_rings);
3701         if (r) {
3702                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3703                 return r;
3704         }
3705
3706         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3707                 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3708
3709         return amdgpu_ring_test_helper(kiq_ring);
3710 }
3711 #endif
3712
3713 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3714 {
3715         int r, i;
3716         struct amdgpu_ring *ring;
3717
3718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3719                 ring = &adev->gfx.gfx_ring[i];
3720
3721                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3722                 if (unlikely(r != 0))
3723                         goto done;
3724
3725                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3726                 if (!r) {
3727                         r = gfx_v11_0_gfx_init_queue(ring);
3728                         amdgpu_bo_kunmap(ring->mqd_obj);
3729                         ring->mqd_ptr = NULL;
3730                 }
3731                 amdgpu_bo_unreserve(ring->mqd_obj);
3732                 if (r)
3733                         goto done;
3734         }
3735 #ifndef BRING_UP_DEBUG
3736         r = gfx_v11_0_kiq_enable_kgq(adev);
3737         if (r)
3738                 goto done;
3739 #endif
3740         r = gfx_v11_0_cp_gfx_start(adev);
3741         if (r)
3742                 goto done;
3743
3744         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3745                 ring = &adev->gfx.gfx_ring[i];
3746                 ring->sched.ready = true;
3747         }
3748 done:
3749         return r;
3750 }
3751
3752 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3753                                       struct amdgpu_mqd_prop *prop)
3754 {
3755         struct v11_compute_mqd *mqd = m;
3756         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3757         uint32_t tmp;
3758
3759         mqd->header = 0xC0310800;
3760         mqd->compute_pipelinestat_enable = 0x00000001;
3761         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3762         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3763         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3764         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3765         mqd->compute_misc_reserved = 0x00000007;
3766
3767         eop_base_addr = prop->eop_gpu_addr >> 8;
3768         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3769         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3770
3771         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3772         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3773         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3774                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3775
3776         mqd->cp_hqd_eop_control = tmp;
3777
3778         /* enable doorbell? */
3779         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3780
3781         if (prop->use_doorbell) {
3782                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3783                                     DOORBELL_OFFSET, prop->doorbell_index);
3784                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3785                                     DOORBELL_EN, 1);
3786                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3787                                     DOORBELL_SOURCE, 0);
3788                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3789                                     DOORBELL_HIT, 0);
3790         } else {
3791                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3792                                     DOORBELL_EN, 0);
3793         }
3794
3795         mqd->cp_hqd_pq_doorbell_control = tmp;
3796
3797         /* disable the queue if it's active */
3798         mqd->cp_hqd_dequeue_request = 0;
3799         mqd->cp_hqd_pq_rptr = 0;
3800         mqd->cp_hqd_pq_wptr_lo = 0;
3801         mqd->cp_hqd_pq_wptr_hi = 0;
3802
3803         /* set the pointer to the MQD */
3804         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3805         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3806
3807         /* set MQD vmid to 0 */
3808         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3809         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3810         mqd->cp_mqd_control = tmp;
3811
3812         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3813         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3814         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3815         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3816
3817         /* set up the HQD, this is similar to CP_RB0_CNTL */
3818         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3819         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3820                             (order_base_2(prop->queue_size / 4) - 1));
3821         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3822                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3823         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3824         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3825         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3826         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3827         mqd->cp_hqd_pq_control = tmp;
3828
3829         /* set the wb address whether it's enabled or not */
3830         wb_gpu_addr = prop->rptr_gpu_addr;
3831         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3832         mqd->cp_hqd_pq_rptr_report_addr_hi =
3833                 upper_32_bits(wb_gpu_addr) & 0xffff;
3834
3835         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3836         wb_gpu_addr = prop->wptr_gpu_addr;
3837         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3838         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3839
3840         tmp = 0;
3841         /* enable the doorbell if requested */
3842         if (prop->use_doorbell) {
3843                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3844                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3845                                 DOORBELL_OFFSET, prop->doorbell_index);
3846
3847                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3848                                     DOORBELL_EN, 1);
3849                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3850                                     DOORBELL_SOURCE, 0);
3851                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3852                                     DOORBELL_HIT, 0);
3853         }
3854
3855         mqd->cp_hqd_pq_doorbell_control = tmp;
3856
3857         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3858         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3859
3860         /* set the vmid for the queue */
3861         mqd->cp_hqd_vmid = 0;
3862
3863         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3864         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3865         mqd->cp_hqd_persistent_state = tmp;
3866
3867         /* set MIN_IB_AVAIL_SIZE */
3868         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3869         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3870         mqd->cp_hqd_ib_control = tmp;
3871
3872         /* set static priority for a compute queue/ring */
3873         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3874         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3875
3876         mqd->cp_hqd_active = prop->hqd_active;
3877
3878         return 0;
3879 }
3880
3881 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3882 {
3883         struct amdgpu_device *adev = ring->adev;
3884         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3885         int j;
3886
3887         /* inactivate the queue */
3888         if (amdgpu_sriov_vf(adev))
3889                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3890
3891         /* disable wptr polling */
3892         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3893
3894         /* write the EOP addr */
3895         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3896                mqd->cp_hqd_eop_base_addr_lo);
3897         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3898                mqd->cp_hqd_eop_base_addr_hi);
3899
3900         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3901         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3902                mqd->cp_hqd_eop_control);
3903
3904         /* enable doorbell? */
3905         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3906                mqd->cp_hqd_pq_doorbell_control);
3907
3908         /* disable the queue if it's active */
3909         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3910                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3911                 for (j = 0; j < adev->usec_timeout; j++) {
3912                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3913                                 break;
3914                         udelay(1);
3915                 }
3916                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3917                        mqd->cp_hqd_dequeue_request);
3918                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3919                        mqd->cp_hqd_pq_rptr);
3920                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3921                        mqd->cp_hqd_pq_wptr_lo);
3922                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3923                        mqd->cp_hqd_pq_wptr_hi);
3924         }
3925
3926         /* set the pointer to the MQD */
3927         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3928                mqd->cp_mqd_base_addr_lo);
3929         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3930                mqd->cp_mqd_base_addr_hi);
3931
3932         /* set MQD vmid to 0 */
3933         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3934                mqd->cp_mqd_control);
3935
3936         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3937         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3938                mqd->cp_hqd_pq_base_lo);
3939         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3940                mqd->cp_hqd_pq_base_hi);
3941
3942         /* set up the HQD, this is similar to CP_RB0_CNTL */
3943         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3944                mqd->cp_hqd_pq_control);
3945
3946         /* set the wb address whether it's enabled or not */
3947         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3948                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3949         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3950                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3951
3952         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3953         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3954                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3955         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3956                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3957
3958         /* enable the doorbell if requested */
3959         if (ring->use_doorbell) {
3960                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3961                         (adev->doorbell_index.kiq * 2) << 2);
3962                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3963                         (adev->doorbell_index.userqueue_end * 2) << 2);
3964         }
3965
3966         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3967                mqd->cp_hqd_pq_doorbell_control);
3968
3969         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3970         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3971                mqd->cp_hqd_pq_wptr_lo);
3972         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3973                mqd->cp_hqd_pq_wptr_hi);
3974
3975         /* set the vmid for the queue */
3976         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3977
3978         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3979                mqd->cp_hqd_persistent_state);
3980
3981         /* activate the queue */
3982         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3983                mqd->cp_hqd_active);
3984
3985         if (ring->use_doorbell)
3986                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3987
3988         return 0;
3989 }
3990
3991 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3992 {
3993         struct amdgpu_device *adev = ring->adev;
3994         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3995         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3996
3997         gfx_v11_0_kiq_setting(ring);
3998
3999         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4000                 /* reset MQD to a clean status */
4001                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4002                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4003
4004                 /* reset ring buffer */
4005                 ring->wptr = 0;
4006                 amdgpu_ring_clear_ring(ring);
4007
4008                 mutex_lock(&adev->srbm_mutex);
4009                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4010                 gfx_v11_0_kiq_init_register(ring);
4011                 soc21_grbm_select(adev, 0, 0, 0, 0);
4012                 mutex_unlock(&adev->srbm_mutex);
4013         } else {
4014                 memset((void *)mqd, 0, sizeof(*mqd));
4015                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4016                         amdgpu_ring_clear_ring(ring);
4017                 mutex_lock(&adev->srbm_mutex);
4018                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4019                 amdgpu_ring_init_mqd(ring);
4020                 gfx_v11_0_kiq_init_register(ring);
4021                 soc21_grbm_select(adev, 0, 0, 0, 0);
4022                 mutex_unlock(&adev->srbm_mutex);
4023
4024                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4025                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4026         }
4027
4028         return 0;
4029 }
4030
4031 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4032 {
4033         struct amdgpu_device *adev = ring->adev;
4034         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4035         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4036
4037         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4038                 memset((void *)mqd, 0, sizeof(*mqd));
4039                 mutex_lock(&adev->srbm_mutex);
4040                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4041                 amdgpu_ring_init_mqd(ring);
4042                 soc21_grbm_select(adev, 0, 0, 0, 0);
4043                 mutex_unlock(&adev->srbm_mutex);
4044
4045                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4046                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4047         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4048                 /* reset MQD to a clean status */
4049                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4050                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4051
4052                 /* reset ring buffer */
4053                 ring->wptr = 0;
4054                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4055                 amdgpu_ring_clear_ring(ring);
4056         } else {
4057                 amdgpu_ring_clear_ring(ring);
4058         }
4059
4060         return 0;
4061 }
4062
4063 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4064 {
4065         struct amdgpu_ring *ring;
4066         int r;
4067
4068         ring = &adev->gfx.kiq.ring;
4069
4070         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4071         if (unlikely(r != 0))
4072                 return r;
4073
4074         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4075         if (unlikely(r != 0)) {
4076                 amdgpu_bo_unreserve(ring->mqd_obj);
4077                 return r;
4078         }
4079
4080         gfx_v11_0_kiq_init_queue(ring);
4081         amdgpu_bo_kunmap(ring->mqd_obj);
4082         ring->mqd_ptr = NULL;
4083         amdgpu_bo_unreserve(ring->mqd_obj);
4084         ring->sched.ready = true;
4085         return 0;
4086 }
4087
4088 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4089 {
4090         struct amdgpu_ring *ring = NULL;
4091         int r = 0, i;
4092
4093         if (!amdgpu_async_gfx_ring)
4094                 gfx_v11_0_cp_compute_enable(adev, true);
4095
4096         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4097                 ring = &adev->gfx.compute_ring[i];
4098
4099                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4100                 if (unlikely(r != 0))
4101                         goto done;
4102                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4103                 if (!r) {
4104                         r = gfx_v11_0_kcq_init_queue(ring);
4105                         amdgpu_bo_kunmap(ring->mqd_obj);
4106                         ring->mqd_ptr = NULL;
4107                 }
4108                 amdgpu_bo_unreserve(ring->mqd_obj);
4109                 if (r)
4110                         goto done;
4111         }
4112
4113         r = amdgpu_gfx_enable_kcq(adev);
4114 done:
4115         return r;
4116 }
4117
4118 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4119 {
4120         int r, i;
4121         struct amdgpu_ring *ring;
4122
4123         if (!(adev->flags & AMD_IS_APU))
4124                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4125
4126         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4127                 /* legacy firmware loading */
4128                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
4129                 if (r)
4130                         return r;
4131
4132                 if (adev->gfx.rs64_enable)
4133                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4134                 else
4135                         r = gfx_v11_0_cp_compute_load_microcode(adev);
4136                 if (r)
4137                         return r;
4138         }
4139
4140         gfx_v11_0_cp_set_doorbell_range(adev);
4141
4142         if (amdgpu_async_gfx_ring) {
4143                 gfx_v11_0_cp_compute_enable(adev, true);
4144                 gfx_v11_0_cp_gfx_enable(adev, true);
4145         }
4146
4147         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4148                 r = amdgpu_mes_kiq_hw_init(adev);
4149         else
4150                 r = gfx_v11_0_kiq_resume(adev);
4151         if (r)
4152                 return r;
4153
4154         r = gfx_v11_0_kcq_resume(adev);
4155         if (r)
4156                 return r;
4157
4158         if (!amdgpu_async_gfx_ring) {
4159                 r = gfx_v11_0_cp_gfx_resume(adev);
4160                 if (r)
4161                         return r;
4162         } else {
4163                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4164                 if (r)
4165                         return r;
4166         }
4167
4168         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4169                 ring = &adev->gfx.gfx_ring[i];
4170                 r = amdgpu_ring_test_helper(ring);
4171                 if (r)
4172                         return r;
4173         }
4174
4175         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4176                 ring = &adev->gfx.compute_ring[i];
4177                 r = amdgpu_ring_test_helper(ring);
4178                 if (r)
4179                         return r;
4180         }
4181
4182         return 0;
4183 }
4184
4185 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4186 {
4187         gfx_v11_0_cp_gfx_enable(adev, enable);
4188         gfx_v11_0_cp_compute_enable(adev, enable);
4189 }
4190
4191 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4192 {
4193         int r;
4194         bool value;
4195
4196         r = adev->gfxhub.funcs->gart_enable(adev);
4197         if (r)
4198                 return r;
4199
4200         adev->hdp.funcs->flush_hdp(adev, NULL);
4201
4202         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4203                 false : true;
4204
4205         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4206         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4207
4208         return 0;
4209 }
4210
4211 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4212 {
4213         u32 tmp;
4214
4215         /* select RS64 */
4216         if (adev->gfx.rs64_enable) {
4217                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4218                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4219                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4220
4221                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4222                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4223                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4224         }
4225
4226         if (amdgpu_emu_mode == 1)
4227                 msleep(100);
4228 }
4229
4230 static int get_gb_addr_config(struct amdgpu_device * adev)
4231 {
4232         u32 gb_addr_config;
4233
4234         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4235         if (gb_addr_config == 0)
4236                 return -EINVAL;
4237
4238         adev->gfx.config.gb_addr_config_fields.num_pkrs =
4239                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4240
4241         adev->gfx.config.gb_addr_config = gb_addr_config;
4242
4243         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4244                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4245                                       GB_ADDR_CONFIG, NUM_PIPES);
4246
4247         adev->gfx.config.max_tile_pipes =
4248                 adev->gfx.config.gb_addr_config_fields.num_pipes;
4249
4250         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4251                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4252                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4253         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4254                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4255                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4256         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4257                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4258                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4259         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4260                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4261                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4262
4263         return 0;
4264 }
4265
4266 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4267 {
4268         uint32_t data;
4269
4270         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4271         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4272         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4273
4274         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4275         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4276         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4277 }
4278
4279 static int gfx_v11_0_hw_init(void *handle)
4280 {
4281         int r;
4282         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4283
4284         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4285                 if (adev->gfx.imu.funcs) {
4286                         /* RLC autoload sequence 1: Program rlc ram */
4287                         if (adev->gfx.imu.funcs->program_rlc_ram)
4288                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4289                 }
4290                 /* rlc autoload firmware */
4291                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4292                 if (r)
4293                         return r;
4294         } else {
4295                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4296                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4297                                 if (adev->gfx.imu.funcs->load_microcode)
4298                                         adev->gfx.imu.funcs->load_microcode(adev);
4299                                 if (adev->gfx.imu.funcs->setup_imu)
4300                                         adev->gfx.imu.funcs->setup_imu(adev);
4301                                 if (adev->gfx.imu.funcs->start_imu)
4302                                         adev->gfx.imu.funcs->start_imu(adev);
4303                         }
4304
4305                         /* disable gpa mode in backdoor loading */
4306                         gfx_v11_0_disable_gpa_mode(adev);
4307                 }
4308         }
4309
4310         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4311             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4312                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4313                 if (r) {
4314                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4315                         return r;
4316                 }
4317         }
4318
4319         adev->gfx.is_poweron = true;
4320
4321         if(get_gb_addr_config(adev))
4322                 DRM_WARN("Invalid gb_addr_config !\n");
4323
4324         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4325             adev->gfx.rs64_enable)
4326                 gfx_v11_0_config_gfx_rs64(adev);
4327
4328         r = gfx_v11_0_gfxhub_enable(adev);
4329         if (r)
4330                 return r;
4331
4332         if (!amdgpu_emu_mode)
4333                 gfx_v11_0_init_golden_registers(adev);
4334
4335         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4336             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4337                 /**
4338                  * For gfx 11, rlc firmware loading relies on smu firmware is
4339                  * loaded firstly, so in direct type, it has to load smc ucode
4340                  * here before rlc.
4341                  */
4342                 if (!(adev->flags & AMD_IS_APU)) {
4343                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
4344                         if (r)
4345                                 return r;
4346                 }
4347         }
4348
4349         gfx_v11_0_constants_init(adev);
4350
4351         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4352                 gfx_v11_0_select_cp_fw_arch(adev);
4353
4354         if (adev->nbio.funcs->gc_doorbell_init)
4355                 adev->nbio.funcs->gc_doorbell_init(adev);
4356
4357         r = gfx_v11_0_rlc_resume(adev);
4358         if (r)
4359                 return r;
4360
4361         /*
4362          * init golden registers and rlc resume may override some registers,
4363          * reconfig them here
4364          */
4365         gfx_v11_0_tcp_harvest(adev);
4366
4367         r = gfx_v11_0_cp_resume(adev);
4368         if (r)
4369                 return r;
4370
4371         return r;
4372 }
4373
4374 #ifndef BRING_UP_DEBUG
4375 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4376 {
4377         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4378         struct amdgpu_ring *kiq_ring = &kiq->ring;
4379         int i, r = 0;
4380
4381         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4382                 return -EINVAL;
4383
4384         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4385                                         adev->gfx.num_gfx_rings))
4386                 return -ENOMEM;
4387
4388         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4389                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4390                                            PREEMPT_QUEUES, 0, 0);
4391
4392         if (adev->gfx.kiq.ring.sched.ready)
4393                 r = amdgpu_ring_test_helper(kiq_ring);
4394
4395         return r;
4396 }
4397 #endif
4398
4399 static int gfx_v11_0_hw_fini(void *handle)
4400 {
4401         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4402         int r;
4403
4404         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4405         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4406
4407         if (!adev->no_hw_access) {
4408 #ifndef BRING_UP_DEBUG
4409                 if (amdgpu_async_gfx_ring) {
4410                         r = gfx_v11_0_kiq_disable_kgq(adev);
4411                         if (r)
4412                                 DRM_ERROR("KGQ disable failed\n");
4413                 }
4414 #endif
4415                 if (amdgpu_gfx_disable_kcq(adev))
4416                         DRM_ERROR("KCQ disable failed\n");
4417
4418                 amdgpu_mes_kiq_hw_fini(adev);
4419         }
4420
4421         if (amdgpu_sriov_vf(adev))
4422                 /* Remove the steps disabling CPG and clearing KIQ position,
4423                  * so that CP could perform IDLE-SAVE during switch. Those
4424                  * steps are necessary to avoid a DMAR error in gfx9 but it is
4425                  * not reproduced on gfx11.
4426                  */
4427                 return 0;
4428
4429         gfx_v11_0_cp_enable(adev, false);
4430         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4431
4432         adev->gfxhub.funcs->gart_disable(adev);
4433
4434         adev->gfx.is_poweron = false;
4435
4436         return 0;
4437 }
4438
4439 static int gfx_v11_0_suspend(void *handle)
4440 {
4441         return gfx_v11_0_hw_fini(handle);
4442 }
4443
4444 static int gfx_v11_0_resume(void *handle)
4445 {
4446         return gfx_v11_0_hw_init(handle);
4447 }
4448
4449 static bool gfx_v11_0_is_idle(void *handle)
4450 {
4451         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4452
4453         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4454                                 GRBM_STATUS, GUI_ACTIVE))
4455                 return false;
4456         else
4457                 return true;
4458 }
4459
4460 static int gfx_v11_0_wait_for_idle(void *handle)
4461 {
4462         unsigned i;
4463         u32 tmp;
4464         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4465
4466         for (i = 0; i < adev->usec_timeout; i++) {
4467                 /* read MC_STATUS */
4468                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4469                         GRBM_STATUS__GUI_ACTIVE_MASK;
4470
4471                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4472                         return 0;
4473                 udelay(1);
4474         }
4475         return -ETIMEDOUT;
4476 }
4477
4478 static int gfx_v11_0_soft_reset(void *handle)
4479 {
4480         u32 grbm_soft_reset = 0;
4481         u32 tmp;
4482         int i, j, k;
4483         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4484
4485         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4486         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4487         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4488         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4489         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4490         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4491
4492         gfx_v11_0_set_safe_mode(adev);
4493
4494         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4495                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4496                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4497                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4498                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4499                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4500                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4501                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4502
4503                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4504                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4505                         }
4506                 }
4507         }
4508         for (i = 0; i < adev->gfx.me.num_me; ++i) {
4509                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4510                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4511                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4512                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4513                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4514                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4515                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4516
4517                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4518                         }
4519                 }
4520         }
4521
4522         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4523
4524         // Read CP_VMID_RESET register three times.
4525         // to get sufficient time for GFX_HQD_ACTIVE reach 0
4526         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4527         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4528         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4529
4530         for (i = 0; i < adev->usec_timeout; i++) {
4531                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4532                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4533                         break;
4534                 udelay(1);
4535         }
4536         if (i >= adev->usec_timeout) {
4537                 printk("Failed to wait all pipes clean\n");
4538                 return -EINVAL;
4539         }
4540
4541         /**********  trigger soft reset  ***********/
4542         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4543         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4544                                         SOFT_RESET_CP, 1);
4545         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4546                                         SOFT_RESET_GFX, 1);
4547         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4548                                         SOFT_RESET_CPF, 1);
4549         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4550                                         SOFT_RESET_CPC, 1);
4551         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4552                                         SOFT_RESET_CPG, 1);
4553         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4554         /**********  exit soft reset  ***********/
4555         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4556         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4557                                         SOFT_RESET_CP, 0);
4558         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4559                                         SOFT_RESET_GFX, 0);
4560         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4561                                         SOFT_RESET_CPF, 0);
4562         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4563                                         SOFT_RESET_CPC, 0);
4564         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4565                                         SOFT_RESET_CPG, 0);
4566         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4567
4568         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4569         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4570         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4571
4572         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4573         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4574
4575         for (i = 0; i < adev->usec_timeout; i++) {
4576                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4577                         break;
4578                 udelay(1);
4579         }
4580         if (i >= adev->usec_timeout) {
4581                 printk("Failed to wait CP_VMID_RESET to 0\n");
4582                 return -EINVAL;
4583         }
4584
4585         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4586         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4587         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4588         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4589         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4590         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4591
4592         gfx_v11_0_unset_safe_mode(adev);
4593
4594         return gfx_v11_0_cp_resume(adev);
4595 }
4596
4597 static bool gfx_v11_0_check_soft_reset(void *handle)
4598 {
4599         int i, r;
4600         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4601         struct amdgpu_ring *ring;
4602         long tmo = msecs_to_jiffies(1000);
4603
4604         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4605                 ring = &adev->gfx.gfx_ring[i];
4606                 r = amdgpu_ring_test_ib(ring, tmo);
4607                 if (r)
4608                         return true;
4609         }
4610
4611         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4612                 ring = &adev->gfx.compute_ring[i];
4613                 r = amdgpu_ring_test_ib(ring, tmo);
4614                 if (r)
4615                         return true;
4616         }
4617
4618         return false;
4619 }
4620
4621 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4622 {
4623         uint64_t clock;
4624
4625         amdgpu_gfx_off_ctrl(adev, false);
4626         mutex_lock(&adev->gfx.gpu_clock_mutex);
4627         clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4628                 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4629         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4630         amdgpu_gfx_off_ctrl(adev, true);
4631         return clock;
4632 }
4633
4634 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4635                                            uint32_t vmid,
4636                                            uint32_t gds_base, uint32_t gds_size,
4637                                            uint32_t gws_base, uint32_t gws_size,
4638                                            uint32_t oa_base, uint32_t oa_size)
4639 {
4640         struct amdgpu_device *adev = ring->adev;
4641
4642         /* GDS Base */
4643         gfx_v11_0_write_data_to_reg(ring, 0, false,
4644                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4645                                     gds_base);
4646
4647         /* GDS Size */
4648         gfx_v11_0_write_data_to_reg(ring, 0, false,
4649                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4650                                     gds_size);
4651
4652         /* GWS */
4653         gfx_v11_0_write_data_to_reg(ring, 0, false,
4654                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4655                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4656
4657         /* OA */
4658         gfx_v11_0_write_data_to_reg(ring, 0, false,
4659                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4660                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4661 }
4662
4663 static int gfx_v11_0_early_init(void *handle)
4664 {
4665         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4666
4667         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4668
4669         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4670         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4671                                           AMDGPU_MAX_COMPUTE_RINGS);
4672
4673         gfx_v11_0_set_kiq_pm4_funcs(adev);
4674         gfx_v11_0_set_ring_funcs(adev);
4675         gfx_v11_0_set_irq_funcs(adev);
4676         gfx_v11_0_set_gds_init(adev);
4677         gfx_v11_0_set_rlc_funcs(adev);
4678         gfx_v11_0_set_mqd_funcs(adev);
4679         gfx_v11_0_set_imu_funcs(adev);
4680
4681         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4682
4683         return 0;
4684 }
4685
4686 static int gfx_v11_0_ras_late_init(void *handle)
4687 {
4688         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4689         struct ras_common_if *gfx_common_if;
4690         int ret;
4691
4692         gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
4693         if (!gfx_common_if)
4694                 return -ENOMEM;
4695
4696         gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
4697
4698         ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
4699         if (ret)
4700                 dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
4701
4702         kfree(gfx_common_if);
4703         return 0;
4704 }
4705
4706 static int gfx_v11_0_late_init(void *handle)
4707 {
4708         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4709         int r;
4710
4711         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4712         if (r)
4713                 return r;
4714
4715         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4716         if (r)
4717                 return r;
4718
4719         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
4720                 r = gfx_v11_0_ras_late_init(handle);
4721                 if (r)
4722                         return r;
4723         }
4724
4725         return 0;
4726 }
4727
4728 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4729 {
4730         uint32_t rlc_cntl;
4731
4732         /* if RLC is not enabled, do nothing */
4733         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4734         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4735 }
4736
4737 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4738 {
4739         uint32_t data;
4740         unsigned i;
4741
4742         data = RLC_SAFE_MODE__CMD_MASK;
4743         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4744
4745         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4746
4747         /* wait for RLC_SAFE_MODE */
4748         for (i = 0; i < adev->usec_timeout; i++) {
4749                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4750                                    RLC_SAFE_MODE, CMD))
4751                         break;
4752                 udelay(1);
4753         }
4754 }
4755
4756 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4757 {
4758         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4759 }
4760
4761 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4762                                       bool enable)
4763 {
4764         uint32_t def, data;
4765
4766         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4767                 return;
4768
4769         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4770
4771         if (enable)
4772                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4773         else
4774                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4775
4776         if (def != data)
4777                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4778 }
4779
4780 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4781                                        bool enable)
4782 {
4783         uint32_t def, data;
4784
4785         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4786                 return;
4787
4788         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4789
4790         if (enable)
4791                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4792         else
4793                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4794
4795         if (def != data)
4796                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4797 }
4798
4799 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4800                                            bool enable)
4801 {
4802         uint32_t def, data;
4803
4804         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4805                 return;
4806
4807         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4808
4809         if (enable)
4810                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4811         else
4812                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4813
4814         if (def != data)
4815                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4816 }
4817
4818 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4819                                                        bool enable)
4820 {
4821         uint32_t data, def;
4822
4823         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4824                 return;
4825
4826         /* It is disabled by HW by default */
4827         if (enable) {
4828                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4829                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4830                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4831
4832                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4833                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4834                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4835
4836                         if (def != data)
4837                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4838                 }
4839         } else {
4840                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4841                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4842
4843                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4844                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4845                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4846
4847                         if (def != data)
4848                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4849                 }
4850         }
4851 }
4852
4853 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4854                                                        bool enable)
4855 {
4856         uint32_t def, data;
4857
4858         if (!(adev->cg_flags &
4859               (AMD_CG_SUPPORT_GFX_CGCG |
4860               AMD_CG_SUPPORT_GFX_CGLS |
4861               AMD_CG_SUPPORT_GFX_3D_CGCG |
4862               AMD_CG_SUPPORT_GFX_3D_CGLS)))
4863                 return;
4864
4865         if (enable) {
4866                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4867
4868                 /* unset CGCG override */
4869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4870                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4871                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4872                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4874                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4875                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4876
4877                 /* update CGCG override bits */
4878                 if (def != data)
4879                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4880
4881                 /* enable cgcg FSM(0x0000363F) */
4882                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4883
4884                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4885                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4886                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4887                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4888                 }
4889
4890                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4891                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4892                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4893                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4894                 }
4895
4896                 if (def != data)
4897                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4898
4899                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4900                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4901
4902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4903                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4904                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4905                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4906                 }
4907
4908                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4909                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4910                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4911                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4912                 }
4913
4914                 if (def != data)
4915                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4916
4917                 /* set IDLE_POLL_COUNT(0x00900100) */
4918                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4919
4920                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4921                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4922                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4923
4924                 if (def != data)
4925                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4926
4927                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4928                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4929                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4930                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4931                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4932                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4933
4934                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4935                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4936                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4937
4938                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4939                 if (adev->sdma.num_instances > 1) {
4940                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4941                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4942                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4943                 }
4944         } else {
4945                 /* Program RLC_CGCG_CGLS_CTRL */
4946                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4947
4948                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4949                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4950
4951                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4952                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4953
4954                 if (def != data)
4955                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4956
4957                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4958                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4959
4960                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4961                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4962                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4963                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4964
4965                 if (def != data)
4966                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4967
4968                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4969                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4970                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4971
4972                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4973                 if (adev->sdma.num_instances > 1) {
4974                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4975                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4976                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4977                 }
4978         }
4979 }
4980
4981 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4982                                             bool enable)
4983 {
4984         amdgpu_gfx_rlc_enter_safe_mode(adev);
4985
4986         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4987
4988         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4989
4990         gfx_v11_0_update_repeater_fgcg(adev, enable);
4991
4992         gfx_v11_0_update_sram_fgcg(adev, enable);
4993
4994         gfx_v11_0_update_perf_clk(adev, enable);
4995
4996         if (adev->cg_flags &
4997             (AMD_CG_SUPPORT_GFX_MGCG |
4998              AMD_CG_SUPPORT_GFX_CGLS |
4999              AMD_CG_SUPPORT_GFX_CGCG |
5000              AMD_CG_SUPPORT_GFX_3D_CGCG |
5001              AMD_CG_SUPPORT_GFX_3D_CGLS))
5002                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5003
5004         amdgpu_gfx_rlc_exit_safe_mode(adev);
5005
5006         return 0;
5007 }
5008
5009 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5010 {
5011         u32 reg, data;
5012
5013         amdgpu_gfx_off_ctrl(adev, false);
5014
5015         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5016         if (amdgpu_sriov_is_pp_one_vf(adev))
5017                 data = RREG32_NO_KIQ(reg);
5018         else
5019                 data = RREG32(reg);
5020
5021         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5022         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5023
5024         if (amdgpu_sriov_is_pp_one_vf(adev))
5025                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5026         else
5027                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5028
5029         amdgpu_gfx_off_ctrl(adev, true);
5030 }
5031
5032 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5033         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5034         .set_safe_mode = gfx_v11_0_set_safe_mode,
5035         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
5036         .init = gfx_v11_0_rlc_init,
5037         .get_csb_size = gfx_v11_0_get_csb_size,
5038         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
5039         .resume = gfx_v11_0_rlc_resume,
5040         .stop = gfx_v11_0_rlc_stop,
5041         .reset = gfx_v11_0_rlc_reset,
5042         .start = gfx_v11_0_rlc_start,
5043         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
5044 };
5045
5046 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5047 {
5048         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5049
5050         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5051                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5052         else
5053                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5054
5055         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5056
5057         // Program RLC_PG_DELAY3 for CGPG hysteresis
5058         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5059                 switch (adev->ip_versions[GC_HWIP][0]) {
5060                 case IP_VERSION(11, 0, 1):
5061                 case IP_VERSION(11, 0, 4):
5062                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5063                         break;
5064                 default:
5065                         break;
5066                 }
5067         }
5068 }
5069
5070 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5071 {
5072         amdgpu_gfx_rlc_enter_safe_mode(adev);
5073
5074         gfx_v11_cntl_power_gating(adev, enable);
5075
5076         amdgpu_gfx_rlc_exit_safe_mode(adev);
5077 }
5078
5079 static int gfx_v11_0_set_powergating_state(void *handle,
5080                                            enum amd_powergating_state state)
5081 {
5082         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083         bool enable = (state == AMD_PG_STATE_GATE);
5084
5085         if (amdgpu_sriov_vf(adev))
5086                 return 0;
5087
5088         switch (adev->ip_versions[GC_HWIP][0]) {
5089         case IP_VERSION(11, 0, 0):
5090         case IP_VERSION(11, 0, 2):
5091         case IP_VERSION(11, 0, 3):
5092                 amdgpu_gfx_off_ctrl(adev, enable);
5093                 break;
5094         case IP_VERSION(11, 0, 1):
5095         case IP_VERSION(11, 0, 4):
5096                 gfx_v11_cntl_pg(adev, enable);
5097                 amdgpu_gfx_off_ctrl(adev, enable);
5098                 break;
5099         default:
5100                 break;
5101         }
5102
5103         return 0;
5104 }
5105
5106 static int gfx_v11_0_set_clockgating_state(void *handle,
5107                                           enum amd_clockgating_state state)
5108 {
5109         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5110
5111         if (amdgpu_sriov_vf(adev))
5112                 return 0;
5113
5114         switch (adev->ip_versions[GC_HWIP][0]) {
5115         case IP_VERSION(11, 0, 0):
5116         case IP_VERSION(11, 0, 1):
5117         case IP_VERSION(11, 0, 2):
5118         case IP_VERSION(11, 0, 3):
5119         case IP_VERSION(11, 0, 4):
5120                 gfx_v11_0_update_gfx_clock_gating(adev,
5121                                 state ==  AMD_CG_STATE_GATE);
5122                 break;
5123         default:
5124                 break;
5125         }
5126
5127         return 0;
5128 }
5129
5130 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5131 {
5132         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5133         int data;
5134
5135         /* AMD_CG_SUPPORT_GFX_MGCG */
5136         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5137         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5138                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5139
5140         /* AMD_CG_SUPPORT_REPEATER_FGCG */
5141         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5142                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5143
5144         /* AMD_CG_SUPPORT_GFX_FGCG */
5145         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5146                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5147
5148         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5149         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5150                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5151
5152         /* AMD_CG_SUPPORT_GFX_CGCG */
5153         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5154         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5155                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5156
5157         /* AMD_CG_SUPPORT_GFX_CGLS */
5158         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5159                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5160
5161         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5162         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5163         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5164                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5165
5166         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5167         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5168                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5169 }
5170
5171 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5172 {
5173         /* gfx11 is 32bit rptr*/
5174         return *(uint32_t *)ring->rptr_cpu_addr;
5175 }
5176
5177 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5178 {
5179         struct amdgpu_device *adev = ring->adev;
5180         u64 wptr;
5181
5182         /* XXX check if swapping is necessary on BE */
5183         if (ring->use_doorbell) {
5184                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5185         } else {
5186                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5187                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5188         }
5189
5190         return wptr;
5191 }
5192
5193 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5194 {
5195         struct amdgpu_device *adev = ring->adev;
5196         uint32_t *wptr_saved;
5197         uint32_t *is_queue_unmap;
5198         uint64_t aggregated_db_index;
5199         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5200         uint64_t wptr_tmp;
5201
5202         if (ring->is_mes_queue) {
5203                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5204                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5205                                               sizeof(uint32_t));
5206                 aggregated_db_index =
5207                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5208                                                                  ring->hw_prio);
5209
5210                 wptr_tmp = ring->wptr & ring->buf_mask;
5211                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5212                 *wptr_saved = wptr_tmp;
5213                 /* assume doorbell always being used by mes mapped queue */
5214                 if (*is_queue_unmap) {
5215                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5216                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5217                 } else {
5218                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5219
5220                         if (*is_queue_unmap)
5221                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5222                 }
5223         } else {
5224                 if (ring->use_doorbell) {
5225                         /* XXX check if swapping is necessary on BE */
5226                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5227                                      ring->wptr);
5228                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5229                 } else {
5230                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5231                                      lower_32_bits(ring->wptr));
5232                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5233                                      upper_32_bits(ring->wptr));
5234                 }
5235         }
5236 }
5237
5238 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5239 {
5240         /* gfx11 hardware is 32bit rptr */
5241         return *(uint32_t *)ring->rptr_cpu_addr;
5242 }
5243
5244 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5245 {
5246         u64 wptr;
5247
5248         /* XXX check if swapping is necessary on BE */
5249         if (ring->use_doorbell)
5250                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5251         else
5252                 BUG();
5253         return wptr;
5254 }
5255
5256 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5257 {
5258         struct amdgpu_device *adev = ring->adev;
5259         uint32_t *wptr_saved;
5260         uint32_t *is_queue_unmap;
5261         uint64_t aggregated_db_index;
5262         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5263         uint64_t wptr_tmp;
5264
5265         if (ring->is_mes_queue) {
5266                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5267                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5268                                               sizeof(uint32_t));
5269                 aggregated_db_index =
5270                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5271                                                                  ring->hw_prio);
5272
5273                 wptr_tmp = ring->wptr & ring->buf_mask;
5274                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5275                 *wptr_saved = wptr_tmp;
5276                 /* assume doorbell always used by mes mapped queue */
5277                 if (*is_queue_unmap) {
5278                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5279                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5280                 } else {
5281                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5282
5283                         if (*is_queue_unmap)
5284                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5285                 }
5286         } else {
5287                 /* XXX check if swapping is necessary on BE */
5288                 if (ring->use_doorbell) {
5289                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5290                                      ring->wptr);
5291                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5292                 } else {
5293                         BUG(); /* only DOORBELL method supported on gfx11 now */
5294                 }
5295         }
5296 }
5297
5298 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5299 {
5300         struct amdgpu_device *adev = ring->adev;
5301         u32 ref_and_mask, reg_mem_engine;
5302         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5303
5304         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5305                 switch (ring->me) {
5306                 case 1:
5307                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5308                         break;
5309                 case 2:
5310                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5311                         break;
5312                 default:
5313                         return;
5314                 }
5315                 reg_mem_engine = 0;
5316         } else {
5317                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5318                 reg_mem_engine = 1; /* pfp */
5319         }
5320
5321         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5322                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5323                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5324                                ref_and_mask, ref_and_mask, 0x20);
5325 }
5326
5327 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5328                                        struct amdgpu_job *job,
5329                                        struct amdgpu_ib *ib,
5330                                        uint32_t flags)
5331 {
5332         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5333         u32 header, control = 0;
5334
5335         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5336
5337         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5338
5339         control |= ib->length_dw | (vmid << 24);
5340
5341         if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5342                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5343
5344                 if (flags & AMDGPU_IB_PREEMPTED)
5345                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5346
5347                 if (vmid)
5348                         gfx_v11_0_ring_emit_de_meta(ring,
5349                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5350         }
5351
5352         if (ring->is_mes_queue)
5353                 /* inherit vmid from mqd */
5354                 control |= 0x400000;
5355
5356         amdgpu_ring_write(ring, header);
5357         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5358         amdgpu_ring_write(ring,
5359 #ifdef __BIG_ENDIAN
5360                 (2 << 0) |
5361 #endif
5362                 lower_32_bits(ib->gpu_addr));
5363         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5364         amdgpu_ring_write(ring, control);
5365 }
5366
5367 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5368                                            struct amdgpu_job *job,
5369                                            struct amdgpu_ib *ib,
5370                                            uint32_t flags)
5371 {
5372         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5373         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5374
5375         if (ring->is_mes_queue)
5376                 /* inherit vmid from mqd */
5377                 control |= 0x40000000;
5378
5379         /* Currently, there is a high possibility to get wave ID mismatch
5380          * between ME and GDS, leading to a hw deadlock, because ME generates
5381          * different wave IDs than the GDS expects. This situation happens
5382          * randomly when at least 5 compute pipes use GDS ordered append.
5383          * The wave IDs generated by ME are also wrong after suspend/resume.
5384          * Those are probably bugs somewhere else in the kernel driver.
5385          *
5386          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5387          * GDS to 0 for this ring (me/pipe).
5388          */
5389         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5390                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5391                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5392                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5393         }
5394
5395         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5396         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5397         amdgpu_ring_write(ring,
5398 #ifdef __BIG_ENDIAN
5399                                 (2 << 0) |
5400 #endif
5401                                 lower_32_bits(ib->gpu_addr));
5402         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5403         amdgpu_ring_write(ring, control);
5404 }
5405
5406 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5407                                      u64 seq, unsigned flags)
5408 {
5409         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5410         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5411
5412         /* RELEASE_MEM - flush caches, send int */
5413         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5414         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5415                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5416                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5417                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5418                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5419                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5420                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5421                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5422                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5423                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5424                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5425         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5426                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5427
5428         /*
5429          * the address should be Qword aligned if 64bit write, Dword
5430          * aligned if only send 32bit data low (discard data high)
5431          */
5432         if (write64bit)
5433                 BUG_ON(addr & 0x7);
5434         else
5435                 BUG_ON(addr & 0x3);
5436         amdgpu_ring_write(ring, lower_32_bits(addr));
5437         amdgpu_ring_write(ring, upper_32_bits(addr));
5438         amdgpu_ring_write(ring, lower_32_bits(seq));
5439         amdgpu_ring_write(ring, upper_32_bits(seq));
5440         amdgpu_ring_write(ring, ring->is_mes_queue ?
5441                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5442 }
5443
5444 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5445 {
5446         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5447         uint32_t seq = ring->fence_drv.sync_seq;
5448         uint64_t addr = ring->fence_drv.gpu_addr;
5449
5450         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5451                                upper_32_bits(addr), seq, 0xffffffff, 4);
5452 }
5453
5454 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5455                                    uint16_t pasid, uint32_t flush_type,
5456                                    bool all_hub, uint8_t dst_sel)
5457 {
5458         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5459         amdgpu_ring_write(ring,
5460                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5461                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5462                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5463                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5464 }
5465
5466 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5467                                          unsigned vmid, uint64_t pd_addr)
5468 {
5469         if (ring->is_mes_queue)
5470                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5471         else
5472                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5473
5474         /* compute doesn't have PFP */
5475         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5476                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5477                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5478                 amdgpu_ring_write(ring, 0x0);
5479         }
5480 }
5481
5482 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5483                                           u64 seq, unsigned int flags)
5484 {
5485         struct amdgpu_device *adev = ring->adev;
5486
5487         /* we only allocate 32bit for each seq wb address */
5488         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5489
5490         /* write fence seq to the "addr" */
5491         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5492         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5493                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5494         amdgpu_ring_write(ring, lower_32_bits(addr));
5495         amdgpu_ring_write(ring, upper_32_bits(addr));
5496         amdgpu_ring_write(ring, lower_32_bits(seq));
5497
5498         if (flags & AMDGPU_FENCE_FLAG_INT) {
5499                 /* set register to trigger INT */
5500                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5501                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5502                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5503                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5504                 amdgpu_ring_write(ring, 0);
5505                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5506         }
5507 }
5508
5509 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5510                                          uint32_t flags)
5511 {
5512         uint32_t dw2 = 0;
5513
5514         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5515         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5516                 /* set load_global_config & load_global_uconfig */
5517                 dw2 |= 0x8001;
5518                 /* set load_cs_sh_regs */
5519                 dw2 |= 0x01000000;
5520                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5521                 dw2 |= 0x10002;
5522         }
5523
5524         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5525         amdgpu_ring_write(ring, dw2);
5526         amdgpu_ring_write(ring, 0);
5527 }
5528
5529 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5530 {
5531         unsigned ret;
5532
5533         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5534         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5535         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5536         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5537         ret = ring->wptr & ring->buf_mask;
5538         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5539
5540         return ret;
5541 }
5542
5543 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5544 {
5545         unsigned cur;
5546         BUG_ON(offset > ring->buf_mask);
5547         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5548
5549         cur = (ring->wptr - 1) & ring->buf_mask;
5550         if (likely(cur > offset))
5551                 ring->ring[offset] = cur - offset;
5552         else
5553                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5554 }
5555
5556 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5557 {
5558         int i, r = 0;
5559         struct amdgpu_device *adev = ring->adev;
5560         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5561         struct amdgpu_ring *kiq_ring = &kiq->ring;
5562         unsigned long flags;
5563
5564         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5565                 return -EINVAL;
5566
5567         spin_lock_irqsave(&kiq->ring_lock, flags);
5568
5569         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5570                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5571                 return -ENOMEM;
5572         }
5573
5574         /* assert preemption condition */
5575         amdgpu_ring_set_preempt_cond_exec(ring, false);
5576
5577         /* assert IB preemption, emit the trailing fence */
5578         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5579                                    ring->trail_fence_gpu_addr,
5580                                    ++ring->trail_seq);
5581         amdgpu_ring_commit(kiq_ring);
5582
5583         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5584
5585         /* poll the trailing fence */
5586         for (i = 0; i < adev->usec_timeout; i++) {
5587                 if (ring->trail_seq ==
5588                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5589                         break;
5590                 udelay(1);
5591         }
5592
5593         if (i >= adev->usec_timeout) {
5594                 r = -EINVAL;
5595                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5596         }
5597
5598         /* deassert preemption condition */
5599         amdgpu_ring_set_preempt_cond_exec(ring, true);
5600         return r;
5601 }
5602
5603 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5604 {
5605         struct amdgpu_device *adev = ring->adev;
5606         struct v10_de_ib_state de_payload = {0};
5607         uint64_t offset, gds_addr, de_payload_gpu_addr;
5608         void *de_payload_cpu_addr;
5609         int cnt;
5610
5611         if (ring->is_mes_queue) {
5612                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5613                                   gfx[0].gfx_meta_data) +
5614                         offsetof(struct v10_gfx_meta_data, de_payload);
5615                 de_payload_gpu_addr =
5616                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5617                 de_payload_cpu_addr =
5618                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5619
5620                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5621                                   gfx[0].gds_backup) +
5622                         offsetof(struct v10_gfx_meta_data, de_payload);
5623                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5624         } else {
5625                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
5626                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5627                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5628
5629                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5630                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5631                                  PAGE_SIZE);
5632         }
5633
5634         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5635         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5636
5637         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5638         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5639         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5640                                  WRITE_DATA_DST_SEL(8) |
5641                                  WR_CONFIRM) |
5642                                  WRITE_DATA_CACHE_POLICY(0));
5643         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5644         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5645
5646         if (resume)
5647                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5648                                            sizeof(de_payload) >> 2);
5649         else
5650                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5651                                            sizeof(de_payload) >> 2);
5652 }
5653
5654 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5655                                     bool secure)
5656 {
5657         uint32_t v = secure ? FRAME_TMZ : 0;
5658
5659         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5660         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5661 }
5662
5663 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5664                                      uint32_t reg_val_offs)
5665 {
5666         struct amdgpu_device *adev = ring->adev;
5667
5668         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5669         amdgpu_ring_write(ring, 0 |     /* src: register*/
5670                                 (5 << 8) |      /* dst: memory */
5671                                 (1 << 20));     /* write confirm */
5672         amdgpu_ring_write(ring, reg);
5673         amdgpu_ring_write(ring, 0);
5674         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5675                                 reg_val_offs * 4));
5676         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5677                                 reg_val_offs * 4));
5678 }
5679
5680 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5681                                    uint32_t val)
5682 {
5683         uint32_t cmd = 0;
5684
5685         switch (ring->funcs->type) {
5686         case AMDGPU_RING_TYPE_GFX:
5687                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5688                 break;
5689         case AMDGPU_RING_TYPE_KIQ:
5690                 cmd = (1 << 16); /* no inc addr */
5691                 break;
5692         default:
5693                 cmd = WR_CONFIRM;
5694                 break;
5695         }
5696         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697         amdgpu_ring_write(ring, cmd);
5698         amdgpu_ring_write(ring, reg);
5699         amdgpu_ring_write(ring, 0);
5700         amdgpu_ring_write(ring, val);
5701 }
5702
5703 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5704                                         uint32_t val, uint32_t mask)
5705 {
5706         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5707 }
5708
5709 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5710                                                    uint32_t reg0, uint32_t reg1,
5711                                                    uint32_t ref, uint32_t mask)
5712 {
5713         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5714
5715         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5716                                ref, mask, 0x20);
5717 }
5718
5719 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5720                                          unsigned vmid)
5721 {
5722         struct amdgpu_device *adev = ring->adev;
5723         uint32_t value = 0;
5724
5725         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5726         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5727         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5728         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5729         WREG32_SOC15(GC, 0, regSQ_CMD, value);
5730 }
5731
5732 static void
5733 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5734                                       uint32_t me, uint32_t pipe,
5735                                       enum amdgpu_interrupt_state state)
5736 {
5737         uint32_t cp_int_cntl, cp_int_cntl_reg;
5738
5739         if (!me) {
5740                 switch (pipe) {
5741                 case 0:
5742                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5743                         break;
5744                 case 1:
5745                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5746                         break;
5747                 default:
5748                         DRM_DEBUG("invalid pipe %d\n", pipe);
5749                         return;
5750                 }
5751         } else {
5752                 DRM_DEBUG("invalid me %d\n", me);
5753                 return;
5754         }
5755
5756         switch (state) {
5757         case AMDGPU_IRQ_STATE_DISABLE:
5758                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5759                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5760                                             TIME_STAMP_INT_ENABLE, 0);
5761                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5762                                             GENERIC0_INT_ENABLE, 0);
5763                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5764                 break;
5765         case AMDGPU_IRQ_STATE_ENABLE:
5766                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5767                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5768                                             TIME_STAMP_INT_ENABLE, 1);
5769                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5770                                             GENERIC0_INT_ENABLE, 1);
5771                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5772                 break;
5773         default:
5774                 break;
5775         }
5776 }
5777
5778 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5779                                                      int me, int pipe,
5780                                                      enum amdgpu_interrupt_state state)
5781 {
5782         u32 mec_int_cntl, mec_int_cntl_reg;
5783
5784         /*
5785          * amdgpu controls only the first MEC. That's why this function only
5786          * handles the setting of interrupts for this specific MEC. All other
5787          * pipes' interrupts are set by amdkfd.
5788          */
5789
5790         if (me == 1) {
5791                 switch (pipe) {
5792                 case 0:
5793                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5794                         break;
5795                 case 1:
5796                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5797                         break;
5798                 case 2:
5799                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5800                         break;
5801                 case 3:
5802                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5803                         break;
5804                 default:
5805                         DRM_DEBUG("invalid pipe %d\n", pipe);
5806                         return;
5807                 }
5808         } else {
5809                 DRM_DEBUG("invalid me %d\n", me);
5810                 return;
5811         }
5812
5813         switch (state) {
5814         case AMDGPU_IRQ_STATE_DISABLE:
5815                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5816                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5817                                              TIME_STAMP_INT_ENABLE, 0);
5818                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5819                                              GENERIC0_INT_ENABLE, 0);
5820                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5821                 break;
5822         case AMDGPU_IRQ_STATE_ENABLE:
5823                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5824                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5825                                              TIME_STAMP_INT_ENABLE, 1);
5826                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5827                                              GENERIC0_INT_ENABLE, 1);
5828                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5829                 break;
5830         default:
5831                 break;
5832         }
5833 }
5834
5835 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5836                                             struct amdgpu_irq_src *src,
5837                                             unsigned type,
5838                                             enum amdgpu_interrupt_state state)
5839 {
5840         switch (type) {
5841         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5842                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5843                 break;
5844         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5845                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5846                 break;
5847         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5848                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5849                 break;
5850         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5851                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5852                 break;
5853         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5854                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5855                 break;
5856         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5857                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5858                 break;
5859         default:
5860                 break;
5861         }
5862         return 0;
5863 }
5864
5865 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5866                              struct amdgpu_irq_src *source,
5867                              struct amdgpu_iv_entry *entry)
5868 {
5869         int i;
5870         u8 me_id, pipe_id, queue_id;
5871         struct amdgpu_ring *ring;
5872         uint32_t mes_queue_id = entry->src_data[0];
5873
5874         DRM_DEBUG("IH: CP EOP\n");
5875
5876         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5877                 struct amdgpu_mes_queue *queue;
5878
5879                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5880
5881                 spin_lock(&adev->mes.queue_id_lock);
5882                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5883                 if (queue) {
5884                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5885                         amdgpu_fence_process(queue->ring);
5886                 }
5887                 spin_unlock(&adev->mes.queue_id_lock);
5888         } else {
5889                 me_id = (entry->ring_id & 0x0c) >> 2;
5890                 pipe_id = (entry->ring_id & 0x03) >> 0;
5891                 queue_id = (entry->ring_id & 0x70) >> 4;
5892
5893                 switch (me_id) {
5894                 case 0:
5895                         if (pipe_id == 0)
5896                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5897                         else
5898                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5899                         break;
5900                 case 1:
5901                 case 2:
5902                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5903                                 ring = &adev->gfx.compute_ring[i];
5904                                 /* Per-queue interrupt is supported for MEC starting from VI.
5905                                  * The interrupt can only be enabled/disabled per pipe instead
5906                                  * of per queue.
5907                                  */
5908                                 if ((ring->me == me_id) &&
5909                                     (ring->pipe == pipe_id) &&
5910                                     (ring->queue == queue_id))
5911                                         amdgpu_fence_process(ring);
5912                         }
5913                         break;
5914                 }
5915         }
5916
5917         return 0;
5918 }
5919
5920 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5921                                               struct amdgpu_irq_src *source,
5922                                               unsigned type,
5923                                               enum amdgpu_interrupt_state state)
5924 {
5925         switch (state) {
5926         case AMDGPU_IRQ_STATE_DISABLE:
5927         case AMDGPU_IRQ_STATE_ENABLE:
5928                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5929                                PRIV_REG_INT_ENABLE,
5930                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5931                 break;
5932         default:
5933                 break;
5934         }
5935
5936         return 0;
5937 }
5938
5939 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5940                                                struct amdgpu_irq_src *source,
5941                                                unsigned type,
5942                                                enum amdgpu_interrupt_state state)
5943 {
5944         switch (state) {
5945         case AMDGPU_IRQ_STATE_DISABLE:
5946         case AMDGPU_IRQ_STATE_ENABLE:
5947                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5948                                PRIV_INSTR_INT_ENABLE,
5949                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5950                 break;
5951         default:
5952                 break;
5953         }
5954
5955         return 0;
5956 }
5957
5958 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5959                                         struct amdgpu_iv_entry *entry)
5960 {
5961         u8 me_id, pipe_id, queue_id;
5962         struct amdgpu_ring *ring;
5963         int i;
5964
5965         me_id = (entry->ring_id & 0x0c) >> 2;
5966         pipe_id = (entry->ring_id & 0x03) >> 0;
5967         queue_id = (entry->ring_id & 0x70) >> 4;
5968
5969         switch (me_id) {
5970         case 0:
5971                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5972                         ring = &adev->gfx.gfx_ring[i];
5973                         /* we only enabled 1 gfx queue per pipe for now */
5974                         if (ring->me == me_id && ring->pipe == pipe_id)
5975                                 drm_sched_fault(&ring->sched);
5976                 }
5977                 break;
5978         case 1:
5979         case 2:
5980                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5981                         ring = &adev->gfx.compute_ring[i];
5982                         if (ring->me == me_id && ring->pipe == pipe_id &&
5983                             ring->queue == queue_id)
5984                                 drm_sched_fault(&ring->sched);
5985                 }
5986                 break;
5987         default:
5988                 BUG();
5989                 break;
5990         }
5991 }
5992
5993 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5994                                   struct amdgpu_irq_src *source,
5995                                   struct amdgpu_iv_entry *entry)
5996 {
5997         DRM_ERROR("Illegal register access in command stream\n");
5998         gfx_v11_0_handle_priv_fault(adev, entry);
5999         return 0;
6000 }
6001
6002 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6003                                    struct amdgpu_irq_src *source,
6004                                    struct amdgpu_iv_entry *entry)
6005 {
6006         DRM_ERROR("Illegal instruction in command stream\n");
6007         gfx_v11_0_handle_priv_fault(adev, entry);
6008         return 0;
6009 }
6010
6011 #if 0
6012 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6013                                              struct amdgpu_irq_src *src,
6014                                              unsigned int type,
6015                                              enum amdgpu_interrupt_state state)
6016 {
6017         uint32_t tmp, target;
6018         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6019
6020         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6021         target += ring->pipe;
6022
6023         switch (type) {
6024         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6025                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6026                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6027                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6028                                             GENERIC2_INT_ENABLE, 0);
6029                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6030
6031                         tmp = RREG32_SOC15_IP(GC, target);
6032                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6033                                             GENERIC2_INT_ENABLE, 0);
6034                         WREG32_SOC15_IP(GC, target, tmp);
6035                 } else {
6036                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6037                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6038                                             GENERIC2_INT_ENABLE, 1);
6039                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6040
6041                         tmp = RREG32_SOC15_IP(GC, target);
6042                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6043                                             GENERIC2_INT_ENABLE, 1);
6044                         WREG32_SOC15_IP(GC, target, tmp);
6045                 }
6046                 break;
6047         default:
6048                 BUG(); /* kiq only support GENERIC2_INT now */
6049                 break;
6050         }
6051         return 0;
6052 }
6053 #endif
6054
6055 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6056 {
6057         const unsigned int gcr_cntl =
6058                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6059                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6060                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6061                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6062                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6063                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6064                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6065                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6066
6067         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6068         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6069         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6070         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6071         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6072         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6073         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6074         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6075         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6076 }
6077
6078 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6079         .name = "gfx_v11_0",
6080         .early_init = gfx_v11_0_early_init,
6081         .late_init = gfx_v11_0_late_init,
6082         .sw_init = gfx_v11_0_sw_init,
6083         .sw_fini = gfx_v11_0_sw_fini,
6084         .hw_init = gfx_v11_0_hw_init,
6085         .hw_fini = gfx_v11_0_hw_fini,
6086         .suspend = gfx_v11_0_suspend,
6087         .resume = gfx_v11_0_resume,
6088         .is_idle = gfx_v11_0_is_idle,
6089         .wait_for_idle = gfx_v11_0_wait_for_idle,
6090         .soft_reset = gfx_v11_0_soft_reset,
6091         .check_soft_reset = gfx_v11_0_check_soft_reset,
6092         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6093         .set_powergating_state = gfx_v11_0_set_powergating_state,
6094         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6095 };
6096
6097 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6098         .type = AMDGPU_RING_TYPE_GFX,
6099         .align_mask = 0xff,
6100         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6101         .support_64bit_ptrs = true,
6102         .secure_submission_supported = true,
6103         .vmhub = AMDGPU_GFXHUB_0,
6104         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6105         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6106         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6107         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6108                 5 + /* COND_EXEC */
6109                 7 + /* PIPELINE_SYNC */
6110                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6111                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6112                 2 + /* VM_FLUSH */
6113                 8 + /* FENCE for VM_FLUSH */
6114                 20 + /* GDS switch */
6115                 5 + /* COND_EXEC */
6116                 7 + /* HDP_flush */
6117                 4 + /* VGT_flush */
6118                 31 + /* DE_META */
6119                 3 + /* CNTX_CTRL */
6120                 5 + /* HDP_INVL */
6121                 8 + 8 + /* FENCE x2 */
6122                 8, /* gfx_v11_0_emit_mem_sync */
6123         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6124         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6125         .emit_fence = gfx_v11_0_ring_emit_fence,
6126         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6127         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6128         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6129         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6130         .test_ring = gfx_v11_0_ring_test_ring,
6131         .test_ib = gfx_v11_0_ring_test_ib,
6132         .insert_nop = amdgpu_ring_insert_nop,
6133         .pad_ib = amdgpu_ring_generic_pad_ib,
6134         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6135         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6136         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6137         .preempt_ib = gfx_v11_0_ring_preempt_ib,
6138         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6139         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6140         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6141         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6142         .soft_recovery = gfx_v11_0_ring_soft_recovery,
6143         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6144 };
6145
6146 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6147         .type = AMDGPU_RING_TYPE_COMPUTE,
6148         .align_mask = 0xff,
6149         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6150         .support_64bit_ptrs = true,
6151         .vmhub = AMDGPU_GFXHUB_0,
6152         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6153         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6154         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6155         .emit_frame_size =
6156                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6157                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6158                 5 + /* hdp invalidate */
6159                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6160                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6161                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6162                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6163                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6164                 8, /* gfx_v11_0_emit_mem_sync */
6165         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6166         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6167         .emit_fence = gfx_v11_0_ring_emit_fence,
6168         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6169         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6170         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6171         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6172         .test_ring = gfx_v11_0_ring_test_ring,
6173         .test_ib = gfx_v11_0_ring_test_ib,
6174         .insert_nop = amdgpu_ring_insert_nop,
6175         .pad_ib = amdgpu_ring_generic_pad_ib,
6176         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6177         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6178         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6179         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6180 };
6181
6182 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6183         .type = AMDGPU_RING_TYPE_KIQ,
6184         .align_mask = 0xff,
6185         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6186         .support_64bit_ptrs = true,
6187         .vmhub = AMDGPU_GFXHUB_0,
6188         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6189         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6190         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6191         .emit_frame_size =
6192                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6193                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6194                 5 + /*hdp invalidate */
6195                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6196                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6197                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6198                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6199                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6200         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6201         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6202         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6203         .test_ring = gfx_v11_0_ring_test_ring,
6204         .test_ib = gfx_v11_0_ring_test_ib,
6205         .insert_nop = amdgpu_ring_insert_nop,
6206         .pad_ib = amdgpu_ring_generic_pad_ib,
6207         .emit_rreg = gfx_v11_0_ring_emit_rreg,
6208         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6209         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6210         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6211 };
6212
6213 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6214 {
6215         int i;
6216
6217         adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6218
6219         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6220                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6221
6222         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6223                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6224 }
6225
6226 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6227         .set = gfx_v11_0_set_eop_interrupt_state,
6228         .process = gfx_v11_0_eop_irq,
6229 };
6230
6231 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6232         .set = gfx_v11_0_set_priv_reg_fault_state,
6233         .process = gfx_v11_0_priv_reg_irq,
6234 };
6235
6236 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6237         .set = gfx_v11_0_set_priv_inst_fault_state,
6238         .process = gfx_v11_0_priv_inst_irq,
6239 };
6240
6241 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6242 {
6243         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6244         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6245
6246         adev->gfx.priv_reg_irq.num_types = 1;
6247         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6248
6249         adev->gfx.priv_inst_irq.num_types = 1;
6250         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6251 }
6252
6253 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6254 {
6255         if (adev->flags & AMD_IS_APU)
6256                 adev->gfx.imu.mode = MISSION_MODE;
6257         else
6258                 adev->gfx.imu.mode = DEBUG_MODE;
6259
6260         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6261 }
6262
6263 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6264 {
6265         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6266 }
6267
6268 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6269 {
6270         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6271                             adev->gfx.config.max_sh_per_se *
6272                             adev->gfx.config.max_shader_engines;
6273
6274         adev->gds.gds_size = 0x1000;
6275         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6276         adev->gds.gws_size = 64;
6277         adev->gds.oa_size = 16;
6278 }
6279
6280 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6281 {
6282         /* set gfx eng mqd */
6283         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6284                 sizeof(struct v11_gfx_mqd);
6285         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6286                 gfx_v11_0_gfx_mqd_init;
6287         /* set compute eng mqd */
6288         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6289                 sizeof(struct v11_compute_mqd);
6290         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6291                 gfx_v11_0_compute_mqd_init;
6292 }
6293
6294 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6295                                                           u32 bitmap)
6296 {
6297         u32 data;
6298
6299         if (!bitmap)
6300                 return;
6301
6302         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6303         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6304
6305         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6306 }
6307
6308 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6309 {
6310         u32 data, wgp_bitmask;
6311         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6312         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6313
6314         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6315         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6316
6317         wgp_bitmask =
6318                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6319
6320         return (~data) & wgp_bitmask;
6321 }
6322
6323 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6324 {
6325         u32 wgp_idx, wgp_active_bitmap;
6326         u32 cu_bitmap_per_wgp, cu_active_bitmap;
6327
6328         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6329         cu_active_bitmap = 0;
6330
6331         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6332                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
6333                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6334                 if (wgp_active_bitmap & (1 << wgp_idx))
6335                         cu_active_bitmap |= cu_bitmap_per_wgp;
6336         }
6337
6338         return cu_active_bitmap;
6339 }
6340
6341 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6342                                  struct amdgpu_cu_info *cu_info)
6343 {
6344         int i, j, k, counter, active_cu_number = 0;
6345         u32 mask, bitmap;
6346         unsigned disable_masks[8 * 2];
6347
6348         if (!adev || !cu_info)
6349                 return -EINVAL;
6350
6351         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6352
6353         mutex_lock(&adev->grbm_idx_mutex);
6354         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6355                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6356                         mask = 1;
6357                         counter = 0;
6358                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6359                         if (i < 8 && j < 2)
6360                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6361                                         adev, disable_masks[i * 2 + j]);
6362                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6363
6364                         /**
6365                          * GFX11 could support more than 4 SEs, while the bitmap
6366                          * in cu_info struct is 4x4 and ioctl interface struct
6367                          * drm_amdgpu_info_device should keep stable.
6368                          * So we use last two columns of bitmap to store cu mask for
6369                          * SEs 4 to 7, the layout of the bitmap is as below:
6370                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6371                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6372                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6373                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6374                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6375                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6376                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6377                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6378                          */
6379                         cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6380
6381                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6382                                 if (bitmap & mask)
6383                                         counter++;
6384
6385                                 mask <<= 1;
6386                         }
6387                         active_cu_number += counter;
6388                 }
6389         }
6390         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6391         mutex_unlock(&adev->grbm_idx_mutex);
6392
6393         cu_info->number = active_cu_number;
6394         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6395
6396         return 0;
6397 }
6398
6399 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6400 {
6401         .type = AMD_IP_BLOCK_TYPE_GFX,
6402         .major = 11,
6403         .minor = 0,
6404         .rev = 0,
6405         .funcs = &gfx_v11_0_ip_funcs,
6406 };