drm/amdgpu: Add RLCG interface driver implementation for gfx v9.4.3 (v3)
authorVictor Lu <victorchengchi.lu@amd.com>
Fri, 16 Jun 2023 15:01:59 +0000 (11:01 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 18 Jul 2023 15:16:41 +0000 (11:16 -0400)
Add RLCG interface support for gfx v9.4.3 and multiple XCCs.
Do not enable it yet.

v2: Fix amdgpu_rlcg_reg_access_ctrl init, add support for multiple XCCs
    in amdgpu_mm_wreg_mmio_rlc

v3: Use GET_INST() when indexing amdgpu_rlcg_reg_access_ctrl

Signed-off-by: Victor Lu <victorchengchi.lu@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/soc15_common.h

index 405f93b639d167d9f3a49d105f8351aa992bfc77..43270613bbcbc8f1f6a5e620ea02ad791934b596 100644 (file)
@@ -1128,7 +1128,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
                                     u64 reg_addr, u32 reg_data);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-                            uint32_t reg, uint32_t v);
+                            uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
 
index 56e89e76ff179a6eaa83c412e270328c0e143fb1..00ab0b3c82771ca910e832e14aec9f0032c56e8a 100644 (file)
@@ -154,7 +154,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
                } else {
                        r = get_user(value, (uint32_t *)buf);
                        if (!r)
-                               amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value);
+                               amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
                }
                if (r) {
                        result = r;
@@ -283,7 +283,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
                } else {
                        r = get_user(value, (uint32_t *)buf);
                        if (!r)
-                               amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value);
+                               amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id);
                }
                if (r) {
                        result = r;
index b593ebb1d67702b50c56fe2a98bb42e45e0eb742..28e6419023b7bbe86380a28cc2cb432c4fdf09d5 100644 (file)
@@ -571,7 +571,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  * this function is invoked only for the debugfs register access
  */
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-                            uint32_t reg, uint32_t v)
+                            uint32_t reg, uint32_t v,
+                            uint32_t xcc_id)
 {
        if (amdgpu_device_skip_hw_access(adev))
                return;
@@ -580,7 +581,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
            adev->gfx.rlc.funcs &&
            adev->gfx.rlc.funcs->is_rlcg_access_range) {
                if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-                       return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
+                       return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
        } else if ((reg * 4) >= adev->rmmio_size) {
                adev->pcie_wreg(adev, reg * 4, v);
        } else {
index 80b263646966eef671ce23ee600e079350ebee82..b591d33af26452aa58066cd0ea346ef3526305e5 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "clearstate_defs.h"
 
+#define AMDGPU_MAX_RLC_INSTANCES       8
+
 /* firmware ID used in rlc toc */
 typedef enum _FIRMWARE_ID_ {
        FIRMWARE_ID_INVALID                                     = 0,
@@ -201,7 +203,7 @@ struct amdgpu_rlc {
        u32                     cp_table_size;
 
        /* safe mode for updating CG/PG state */
-       bool in_safe_mode[8];
+       bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
        const struct amdgpu_rlc_funcs *funcs;
 
        /* for firmware data */
@@ -257,7 +259,7 @@ struct amdgpu_rlc {
 
        bool rlcg_reg_access_supported;
        /* registers for rlcg indirect reg access */
-       struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl;
+       struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES];
 };
 
 void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
index 41aa853a07d24d3bada2f7e1f6a44c6f3981f473..770eb9725ec1302bdbad836c744169e430449eff 100644 (file)
@@ -954,7 +954,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
        return ret;
 }
 
-static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
+static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
 {
        struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
        uint32_t timeout = 50000;
@@ -972,7 +972,12 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
                return 0;
        }
 
-       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
+       if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+               dev_err(adev->dev, "invalid xcc\n");
+               return 0;
+       }
+
+       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
        scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
        scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
        scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
@@ -1037,13 +1042,13 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
 
 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
                       u32 offset, u32 value,
-                      u32 acc_flags, u32 hwip)
+                      u32 acc_flags, u32 hwip, u32 xcc_id)
 {
        u32 rlcg_flag;
 
        if (!amdgpu_sriov_runtime(adev) &&
                amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
-               amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag);
+               amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
                return;
        }
 
@@ -1054,13 +1059,13 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
 }
 
 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
-                     u32 offset, u32 acc_flags, u32 hwip)
+                     u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
 {
        u32 rlcg_flag;
 
        if (!amdgpu_sriov_runtime(adev) &&
                amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
-               return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag);
+               return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
 
        if (acc_flags & AMDGPU_REGS_NO_KIQ)
                return RREG32_NO_KIQ(offset);
index 4f7bab52282ac1b6463fb2420315bbdf4b92b0ba..d1f7509a44cb79dd65d251ffa4f480e64681c4a0 100644 (file)
@@ -355,9 +355,9 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
                        struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
                       u32 offset, u32 value,
-                      u32 acc_flags, u32 hwip);
+                      u32 acc_flags, u32 hwip, u32 xcc_id);
 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
-                     u32 offset, u32 acc_flags, u32 hwip);
+                     u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
                        uint32_t ucode_id);
 #endif
index 1d671c330475219fcaa6e99ba2937ec56ab87748..0aee9c8288a2b8eb88054177b52e8a8ce0404215 100644 (file)
@@ -4137,7 +4137,7 @@ static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
 {
        struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 
-       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
+       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
        reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
        reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
        reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
index 3a7af59e83ca11ea1409e8903899d302b5b3187c..4d53d6f13c3b78ce18083a031717ca47739d2b6e 100644 (file)
@@ -663,7 +663,7 @@ static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
 {
        struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 
-       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
+       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
        reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
        reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
        reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
index fc179e5f8dc194e514f9e26a794bff3601f63342..0a74116b3142f145822b12a0b808840895b2e0fb 100644 (file)
@@ -1634,7 +1634,7 @@ static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
 {
        struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 
-       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
+       reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
        reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
        reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
        reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
index a20b776839248c44514f9a37322d331aeff05525..b594d5c05149654000a5b7d1206e576a0bd56763 100644 (file)
@@ -1072,6 +1072,25 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev,
        WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
 }
 
+static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+       int xcc_id;
+       struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+       for (xcc_id = 0; xcc_id < AMDGPU_MAX_RLC_INSTANCES; xcc_id++) {
+               if (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)
+                       continue;
+               reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
+               reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0);
+               reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG1);
+               reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG2);
+               reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG3);
+               reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
+               reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
+               reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
+       }
+}
+
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
 {
        /* init spm vmid with 0xf */
@@ -2149,6 +2168,9 @@ static int gfx_v9_4_3_early_init(void *handle)
        gfx_v9_4_3_set_gds_init(adev);
        gfx_v9_4_3_set_rlc_funcs(adev);
 
+       /* init rlcg reg access ctrl */
+       gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev);
+
        return gfx_v9_4_3_init_microcode(adev);
 }
 
index 96948a59f8dd556455d1f6ab0bd484285852b6e7..da683afa0222f188dda74788863fd82ec5a35bcd 100644 (file)
 #define SOC15_REG_OFFSET1(ip, inst, reg, offset) \
        (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
 
-#define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \
+#define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \
        ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
-        amdgpu_sriov_wreg(adev, reg, value, flag, hwip) : \
+        amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \
         WREG32(reg, value))
 
-#define __RREG32_SOC15_RLC__(reg, flag, hwip) \
+#define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \
        ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
-        amdgpu_sriov_rreg(adev, reg, flag, hwip) : \
+        amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \
         RREG32(reg))
 
 #define WREG32_FIELD15(ip, idx, reg, field, val)       \
         __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg,   \
                                (__RREG32_SOC15_RLC__( \
                                        adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
-                                       0, ip##_HWIP) & \
+                                       0, ip##_HWIP, idx) & \
                                ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
-                             0, ip##_HWIP)
+                             0, ip##_HWIP, idx)
 
 #define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val)        \
        __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name,   \
                        (__RREG32_SOC15_RLC__( \
                                        adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
-                                       0, ip##_HWIP) & \
+                                       0, ip##_HWIP, idx) & \
                                        ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \
-                       0, ip##_HWIP)
+                       0, ip##_HWIP, idx)
 
 #define RREG32_SOC15(ip, inst, reg) \
        __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
-                        0, ip##_HWIP)
+                        0, ip##_HWIP, inst)
 
-#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)
+#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
        __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
-                        AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+                        AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
         __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \
-                        (offset), 0, ip##_HWIP)
+                        (offset), 0, ip##_HWIP, inst)
 
 #define WREG32_SOC15(ip, inst, reg, value) \
         __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \
-                         value, 0, ip##_HWIP)
+                         value, 0, ip##_HWIP, inst)
 
 #define WREG32_SOC15_IP(ip, reg, value) \
-        __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)
+        __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
 #define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-        __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+        __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
        __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
-                            value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+                            value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
         __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \
-                         value, 0, ip##_HWIP)
+                         value, 0, ip##_HWIP, inst)
 
 #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask)      \
        amdgpu_device_wait_on_rreg(adev, inst,                       \
        #reg, expected_value, mask)
 
 #define WREG32_RLC(reg, value) \
-       __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP)
+       __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0)
 
-#define WREG32_RLC_EX(prefix, reg, value) \
+#define WREG32_RLC_EX(prefix, reg, value, inst) \
        do {                                                    \
                if (amdgpu_sriov_fullaccess(adev)) {    \
                        uint32_t i = 0; \
                        uint32_t retries = 50000;       \
-                       uint32_t r0 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0;       \
-                       uint32_t r1 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1;       \
-                       uint32_t spare_int = adev->reg_offset[GC_HWIP][0][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT;      \
+                       uint32_t r0 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0;    \
+                       uint32_t r1 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1;    \
+                       uint32_t spare_int = adev->reg_offset[GC_HWIP][inst][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT;   \
                        WREG32(r0, value);      \
                        WREG32(r1, (reg | 0x80000000)); \
                        WREG32(spare_int, 0x1); \
 
 /* shadow the registers in the callback function */
 #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
-       __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP)
+       __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP, inst)
 
 /* for GC only */
 #define RREG32_RLC(reg) \
        __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP)
 
 #define WREG32_RLC_NO_KIQ(reg, value, hwip) \
-       __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip)
+       __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
 
 #define RREG32_RLC_NO_KIQ(reg, hwip) \
-       __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip)
+       __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
 
 #define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
        do {                                                    \
        } while (0)
 
 #define RREG32_SOC15_RLC(ip, inst, reg) \
-       __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP)
+       __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP, inst)
 
 #define WREG32_SOC15_RLC(ip, inst, reg, value) \
        do {                                                    \
                uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
-               __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \
+               __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP, inst); \
        } while (0)
 
 #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
        do {                                                    \
                        uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\
-                       WREG32_RLC_EX(prefix, target_reg, value); \
+                       WREG32_RLC_EX(prefix, target_reg, value, inst); \
        } while (0)
 
 #define WREG32_FIELD15_RLC(ip, idx, reg, field, val)   \
        __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
                             (__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
-                                                  AMDGPU_REGS_RLC, ip##_HWIP) & \
+                                                  AMDGPU_REGS_RLC, ip##_HWIP, idx) & \
                              ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
-                            AMDGPU_REGS_RLC, ip##_HWIP)
+                            AMDGPU_REGS_RLC, ip##_HWIP, idx)
 
 #define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
-       __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP)
+       __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP, inst)
 
 #define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
-       __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP)
+       __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst)
 
 /* inst equals to ext for some IPs */
 #define RREG32_SOC15_EXT(ip, inst, reg, ext) \