drm/amdgpu: Add parsing SQ_EDC_INFO to SQ IH v3.
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
index 818874b13c99d1ee31d8dd7428ef27f0c0b1b267..4201f3dfaececd1b6a47a8f8b5691548ae18fedd 100644 (file)
@@ -704,6 +704,17 @@ static const u32 stoney_mgcg_cgcg_init[] =
        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 };
 
+
+static const char * const sq_edc_source_names[] = {
+       "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
+       "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
+       "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
+       "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
+       "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
+       "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
+       "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
+};
+
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
@@ -866,26 +877,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_ib ib;
        struct dma_fence *f = NULL;
-       uint32_t scratch;
-       uint32_t tmp = 0;
+
+       unsigned int index;
+       uint64_t gpu_addr;
+       uint32_t tmp;
        long r;
 
-       r = amdgpu_gfx_scratch_get(adev, &scratch);
+       r = amdgpu_device_wb_get(adev, &index);
        if (r) {
-               DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+               dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
                return r;
        }
-       WREG32(scratch, 0xCAFEDEAD);
+
+       gpu_addr = adev->wb.gpu_addr + (index * 4);
+       adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
        memset(&ib, 0, sizeof(ib));
-       r = amdgpu_ib_get(adev, NULL, 256, &ib);
+       r = amdgpu_ib_get(adev, NULL, 16, &ib);
        if (r) {
                DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
                goto err1;
        }
-       ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
-       ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
-       ib.ptr[2] = 0xDEADBEEF;
-       ib.length_dw = 3;
+       ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+       ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+       ib.ptr[2] = lower_32_bits(gpu_addr);
+       ib.ptr[3] = upper_32_bits(gpu_addr);
+       ib.ptr[4] = 0xDEADBEEF;
+       ib.length_dw = 5;
 
        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
        if (r)
@@ -900,20 +917,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
                goto err2;
        }
-       tmp = RREG32(scratch);
+
+       tmp = adev->wb.wb[index];
        if (tmp == 0xDEADBEEF) {
                DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
                r = 0;
        } else {
-               DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
-                         scratch, tmp);
+               DRM_ERROR("ib test on ring %d failed\n", ring->idx);
                r = -EINVAL;
        }
+
 err2:
        amdgpu_ib_free(adev, &ib, NULL);
        dma_fence_put(f);
 err1:
-       amdgpu_gfx_scratch_free(adev, scratch);
+       amdgpu_device_wb_free(adev, index);
        return r;
 }
 
@@ -1999,6 +2017,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
        return 0;
 }
 
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
+
 static int gfx_v8_0_sw_init(void *handle)
 {
        int i, j, k, r, ring_id;
@@ -2048,6 +2068,22 @@ static int gfx_v8_0_sw_init(void *handle)
        if (r)
                return r;
 
+       /* Add CP EDC/ECC irq  */
+       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
+                             &adev->gfx.cp_ecc_error_irq);
+       if (r)
+               return r;
+
+       /* SQ interrupts. */
+       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
+                             &adev->gfx.sq_irq);
+       if (r) {
+               DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
+               return r;
+       }
+
+       INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
+
        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
        gfx_v8_0_scratch_init(adev);
@@ -5111,6 +5147,10 @@ static int gfx_v8_0_hw_fini(void *handle)
        amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
        amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
+       amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+
+       amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
+
        /* disable KCQ to avoid CPC touch memory not valid anymore */
        for (i = 0; i < adev->gfx.num_compute_rings; i++)
                gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
@@ -5542,6 +5582,20 @@ static int gfx_v8_0_late_init(void *handle)
        if (r)
                return r;
 
+       r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+       if (r) {
+               DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
+               return r;
+       }
+
+       r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
+       if (r) {
+               DRM_ERROR(
+                       "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
+                       r);
+               return r;
+       }
+
        amdgpu_device_ip_set_powergating_state(adev,
                                               AMD_IP_BLOCK_TYPE_GFX,
                                               AMD_PG_STATE_GATE);
@@ -6787,6 +6841,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
        return 0;
 }
 
+static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
+                                        struct amdgpu_irq_src *source,
+                                        unsigned int type,
+                                        enum amdgpu_interrupt_state state)
+{
+       int enable_flag;
+
+       switch (state) {
+       case AMDGPU_IRQ_STATE_DISABLE:
+               enable_flag = 0;
+               break;
+
+       case AMDGPU_IRQ_STATE_ENABLE:
+               enable_flag = 1;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+       WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+       WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+       WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+       WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+       WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+       WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+                    enable_flag);
+
+       return 0;
+}
+
+static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
+                                    struct amdgpu_irq_src *source,
+                                    unsigned int type,
+                                    enum amdgpu_interrupt_state state)
+{
+       int enable_flag;
+
+       switch (state) {
+       case AMDGPU_IRQ_STATE_DISABLE:
+               enable_flag = 1;
+               break;
+
+       case AMDGPU_IRQ_STATE_ENABLE:
+               enable_flag = 0;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
+                    enable_flag);
+
+       return 0;
+}
+
 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
                            struct amdgpu_irq_src *source,
                            struct amdgpu_iv_entry *entry)
@@ -6837,6 +6962,114 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
        return 0;
 }
 
+static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
+                                    struct amdgpu_irq_src *source,
+                                    struct amdgpu_iv_entry *entry)
+{
+       DRM_ERROR("CP EDC/ECC error detected.");
+       return 0;
+}
+
+static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
+{
+       u32 enc, se_id, sh_id, cu_id;
+       char type[20];
+       int sq_edc_source = -1;
+
+       enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
+       se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
+
+       switch (enc) {
+               case 0:
+                       DRM_INFO("SQ general purpose intr detected:"
+                                       "se_id %d, immed_overflow %d, host_reg_overflow %d,"
+                                       "host_cmd_overflow %d, cmd_timestamp %d,"
+                                       "reg_timestamp %d, thread_trace_buff_full %d,"
+                                       "wlt %d, thread_trace %d.\n",
+                                       se_id,
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
+                                       );
+                       break;
+               case 1:
+               case 2:
+
+                       cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
+                       sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
+
+                       /*
+                        * This function can be called either directly from ISR
+                        * or from BH in which case we can access SQ_EDC_INFO
+                        * instance
+                        */
+                       if (in_task()) {
+                               mutex_lock(&adev->grbm_idx_mutex);
+                               gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+
+                               sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
+
+                               gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+                               mutex_unlock(&adev->grbm_idx_mutex);
+                       }
+
+                       if (enc == 1)
+                               sprintf(type, "instruction intr");
+                       else
+                               sprintf(type, "EDC/ECC error");
+
+                       DRM_INFO(
+                               "SQ %s detected: "
+                                       "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
+                                       "trap %s, sq_ed_info.source %s.\n",
+                                       type, se_id, sh_id, cu_id,
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
+                                       REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
+                                       (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
+                               );
+                       break;
+               default:
+                       DRM_ERROR("SQ invalid encoding type\n.");
+       }
+}
+
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
+{
+
+       struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
+       struct sq_work *sq_work = container_of(work, struct sq_work, work);
+
+       gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
+}
+
+static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
+                          struct amdgpu_irq_src *source,
+                          struct amdgpu_iv_entry *entry)
+{
+       unsigned ih_data = entry->src_data[0];
+
+       /*
+        * Try to submit work so SQ_EDC_INFO can be accessed from
+        * BH. If previous work submission hasn't finished yet
+        * just print whatever info is possible directly from the ISR.
+        */
+       if (work_pending(&adev->gfx.sq_work.work)) {
+               gfx_v8_0_parse_sq_irq(adev, ih_data);
+       } else {
+               adev->gfx.sq_work.ih_data = ih_data;
+               schedule_work(&adev->gfx.sq_work.work);
+       }
+
+       return 0;
+}
+
 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
                                            struct amdgpu_irq_src *src,
                                            unsigned int type,
@@ -7037,6 +7270,16 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
        .process = gfx_v8_0_kiq_irq,
 };
 
+static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
+       .set = gfx_v8_0_set_cp_ecc_int_state,
+       .process = gfx_v8_0_cp_ecc_error_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
+       .set = gfx_v8_0_set_sq_int_state,
+       .process = gfx_v8_0_sq_irq,
+};
+
 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 {
        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -7050,6 +7293,12 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 
        adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
        adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
+
+       adev->gfx.cp_ecc_error_irq.num_types = 1;
+       adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
+
+       adev->gfx.sq_irq.num_types = 1;
+       adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
 }
 
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)