drm/amdgpu: add ras_controller and err_event_athub interrupt support
authorHawking Zhang <Hawking.Zhang@amd.com>
Wed, 5 Jun 2019 06:57:00 +0000 (14:57 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 13 Sep 2019 22:11:04 +0000 (17:11 -0500)
Ras controller interrupt and Ras err event athub interrupt are two dedicated
interrupts for RAS support.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c

index 28417e485c58d05bfc505343de78ed8e10ba2957..a04c5ea0341867388906bdfd54831216a80788e7 100644 (file)
@@ -79,10 +79,14 @@ struct amdgpu_nbio_funcs {
        void (*remap_hdp_registers)(struct amdgpu_device *adev);
        void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
        void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+       int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+       int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
        const struct nbio_hdp_flush_reg *hdp_flush_reg;
+       struct amdgpu_irq_src ras_controller_irq;
+       struct amdgpu_irq_src ras_err_event_athub_irq;
        const struct amdgpu_nbio_funcs *funcs;
 };
 
index 016ea274b955cac5ba37418701084faa0c7e41cd..f7180109bef405a7249280c6f38a8dc95338fc48 100644 (file)
@@ -29,6 +29,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_atomfirmware.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 
 const char *ras_error_string[] = {
        "none",
@@ -1500,6 +1501,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 int amdgpu_ras_init(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       int r;
 
        if (con)
                return 0;
@@ -1527,6 +1529,18 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        /* Might need get this flag from vbios. */
        con->flags = RAS_DEFAULT_FLAGS;
 
+       if (adev->nbio.funcs->init_ras_controller_interrupt) {
+               r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+               if (r)
+                       return r;
+       }
+
+       if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
+               r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+               if (r)
+                       return r;
+       }
+
        if (amdgpu_ras_recovery_init(adev))
                goto recovery_out;
 
index 6ecdd5e3ca3fb392fde329183106d830c3ea5011..faf9300630a5b2dd2c88f4eeba2ac3779b5c11fc 100644 (file)
@@ -27,6 +27,7 @@
 #include "nbio/nbio_7_4_offset.h"
 #include "nbio/nbio_7_4_sh_mask.h"
 #include "nbio/nbio_7_4_0_smn.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
@@ -345,6 +346,128 @@ static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_d
        }
 }
 
+
+static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
+                                                 struct amdgpu_irq_src *src,
+                                                 unsigned type,
+                                                 enum amdgpu_interrupt_state state)
+{
+       /* The ras_controller_irq enablement should be done in psp bl when it
+        * tries to enable ras feature. Driver only need to set the correct interrupt
+        * vector for bare-metal and sriov use case respectively
+        */
+       uint32_t bif_intr_cntl;
+
+       bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+       if (state == AMDGPU_IRQ_STATE_ENABLE) {
+               /* set interrupt vector select bit to 0 to select
+                * vetcor 1 for bare metal case */
+               bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+                                             BIF_INTR_CNTL,
+                                             RAS_INTR_VEC_SEL, 0);
+               WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+       }
+
+       return 0;
+}
+
+static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev,
+                                               struct amdgpu_irq_src *source,
+                                               struct amdgpu_iv_entry *entry)
+{
+       /* By design, the ih cookie for ras_controller_irq should be written
+        * to BIFring instead of general iv ring. However, due to known bif ring
+        * hw bug, it has to be disabled. There is no chance the process function
+        * will be involked. Just left it as a dummy one.
+        */
+       return 0;
+}
+
+static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+                                                      struct amdgpu_irq_src *src,
+                                                      unsigned type,
+                                                      enum amdgpu_interrupt_state state)
+{
+       /* The ras_controller_irq enablement should be done in psp bl when it
+        * tries to enable ras feature. Driver only need to set the correct interrupt
+        * vector for bare-metal and sriov use case respectively
+        */
+       uint32_t bif_intr_cntl;
+
+       bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+       if (state == AMDGPU_IRQ_STATE_ENABLE) {
+               /* set interrupt vector select bit to 0 to select
+                * vetcor 1 for bare metal case */
+               bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+                                             BIF_INTR_CNTL,
+                                             RAS_INTR_VEC_SEL, 0);
+               WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+       }
+
+       return 0;
+}
+
+static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev,
+                                                struct amdgpu_irq_src *source,
+                                                struct amdgpu_iv_entry *entry)
+{
+       /* By design, the ih cookie for err_event_athub_irq should be written
+        * to BIFring instead of general iv ring. However, due to known bif ring
+        * hw bug, it has to be disabled. There is no chance the process function
+        * will be involked. Just left it as a dummy one.
+        */
+       return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = {
+       .set = nbio_v7_4_set_ras_controller_irq_state,
+       .process = nbio_v7_4_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = {
+       .set = nbio_v7_4_set_ras_err_event_athub_irq_state,
+       .process = nbio_v7_4_process_err_event_athub_irq,
+};
+
+static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+       int r;
+
+       /* init the irq funcs */
+       adev->nbio.ras_controller_irq.funcs =
+               &nbio_v7_4_ras_controller_irq_funcs;
+       adev->nbio.ras_controller_irq.num_types = 1;
+
+       /* register ras controller interrupt */
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+                             NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+                             &adev->nbio.ras_controller_irq);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+       int r;
+
+       /* init the irq funcs */
+       adev->nbio.ras_err_event_athub_irq.funcs =
+               &nbio_v7_4_ras_err_event_athub_irq_funcs;
+       adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+       /* register ras err event athub interrupt */
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+                             NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+                             &adev->nbio.ras_err_event_athub_irq);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
        .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
        .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
@@ -368,4 +491,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
        .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
        .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
        .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+       .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+       .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
 };