drm/msm: use componentised device support
[linux-2.6-block.git] / drivers / gpu / drm / msm / adreno / a3xx_gpu.c
index 035bd13dc8bdc3ab039bc2f1bab291cbf4c9bf17..f20fbde5dc490595f57d171424c1c25f7ff5dd5b 100644 (file)
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef CONFIG_MSM_OCMEM
+#  include <mach/ocmem.h>
+#endif
+
 #include "a3xx_gpu.h"
 
 #define A3XX_INT0_MASK \
         A3XX_INT0_CP_AHB_ERROR_HALT |     \
         A3XX_INT0_UCHE_OOB_ACCESS)
 
-static struct platform_device *a3xx_pdev;
+
+static bool hang_debug = false;
+MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
+module_param_named(hang_debug, hang_debug, bool, 0600);
+static void a3xx_dump(struct msm_gpu *gpu);
 
 static void a3xx_me_init(struct msm_gpu *gpu)
 {
@@ -63,6 +71,7 @@ static void a3xx_me_init(struct msm_gpu *gpu)
 static int a3xx_hw_init(struct msm_gpu *gpu)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
        uint32_t *ptr, len;
        int i, ret;
 
@@ -105,6 +114,21 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 
+       } else if (adreno_is_a330v2(adreno_gpu)) {
+               /*
+                * Most of the VBIF registers on 8974v2 have the correct
+                * values at power on, so we won't modify those if we don't
+                * need to
+                */
+               /* Enable 1k sort: */
+               gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
+               gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
+               /* Enable WR-REQ: */
+               gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
+               gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
+               /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
+               gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
+
        } else if (adreno_is_a330(adreno_gpu)) {
                /* Set up 16 deep read/write request queues: */
                gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
@@ -121,10 +145,10 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
                /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
                gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
                /* Set up AOOO: */
-               gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff);
-               gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff);
+               gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
+               gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
                /* Enable 1K sort: */
-               gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff);
+               gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
                gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
                /* Disable VBIF clock gating. This is to enable AXI running
                 * higher frequency than GPU:
@@ -162,14 +186,23 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
 
        /* Enable Clock gating: */
-       gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
-
-       /* Set the OCMEM base address for A330 */
-//TODO:
-//     if (adreno_is_a330(adreno_gpu)) {
-//             gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
-//                     (unsigned int)(a3xx_gpu->ocmem_base >> 14));
-//     }
+       if (adreno_is_a320(adreno_gpu))
+               gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
+       else if (adreno_is_a330v2(adreno_gpu))
+               gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
+       else if (adreno_is_a330(adreno_gpu))
+               gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
+
+       if (adreno_is_a330v2(adreno_gpu))
+               gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
+       else if (adreno_is_a330(adreno_gpu))
+               gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
+
+       /* Set the OCMEM base address for A330, etc */
+       if (a3xx_gpu->ocmem_hdl) {
+               gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
+                       (unsigned int)(a3xx_gpu->ocmem_base >> 14));
+       }
 
        /* Turn on performance counters: */
        gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
@@ -219,7 +252,7 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
        /* Load PM4: */
        ptr = (uint32_t *)(adreno_gpu->pm4->data);
        len = adreno_gpu->pm4->size / 4;
-       DBG("loading PM4 ucode version: %u", ptr[0]);
+       DBG("loading PM4 ucode version: %x", ptr[1]);
 
        gpu_write(gpu, REG_AXXX_CP_DEBUG,
                        AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
@@ -231,19 +264,26 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
        /* Load PFP: */
        ptr = (uint32_t *)(adreno_gpu->pfp->data);
        len = adreno_gpu->pfp->size / 4;
-       DBG("loading PFP ucode version: %u", ptr[0]);
+       DBG("loading PFP ucode version: %x", ptr[5]);
 
        gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
        for (i = 1; i < len; i++)
                gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
 
        /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
-       if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu))
+       if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
                gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
                                AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
-
+       } else if (adreno_is_a330(adreno_gpu)) {
+               /* NOTE: this (value take from downstream android driver)
+                * includes some bits outside of the known bitfields.  But
+                * A330 has this "MERCIU queue" thing too, which might
+                * explain a new bitfield or reshuffling:
+                */
+               gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
+       }
 
        /* clear ME_HALT to start micro engine */
        gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
@@ -253,6 +293,17 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
        return 0;
 }
 
+static void a3xx_recover(struct msm_gpu *gpu)
+{
+       /* dump registers before resetting gpu, if enabled: */
+       if (hang_debug)
+               a3xx_dump(gpu);
+       gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
+       gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
+       gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
+       adreno_recover(gpu);
+}
+
 static void a3xx_destroy(struct msm_gpu *gpu)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -261,27 +312,24 @@ static void a3xx_destroy(struct msm_gpu *gpu)
        DBG("%s", gpu->name);
 
        adreno_gpu_cleanup(adreno_gpu);
-       put_device(&a3xx_gpu->pdev->dev);
+
+#ifdef CONFIG_MSM_OCMEM
+       if (a3xx_gpu->ocmem_base)
+               ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
+#endif
+
        kfree(a3xx_gpu);
 }
 
 static void a3xx_idle(struct msm_gpu *gpu)
 {
-       unsigned long t;
-
        /* wait for ringbuffer to drain: */
        adreno_idle(gpu);
 
-       t = jiffies + ADRENO_IDLE_TIMEOUT;
-
        /* then wait for GPU to finish: */
-       do {
-               uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
-               if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY))
-                       return;
-       } while(time_before(jiffies, t));
-
-       DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name);
+       if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
+                       A3XX_RBBM_STATUS_GPU_BUSY)))
+               DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 
        /* TODO maybe we need to reset GPU here to recover from hang? */
 }
@@ -302,7 +350,6 @@ static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
        return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_DEBUG_FS
 static const unsigned int a3xx_registers[] = {
        0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
        0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
@@ -342,11 +389,18 @@ static const unsigned int a3xx_registers[] = {
        0x303c, 0x303c, 0x305e, 0x305f,
 };
 
+#ifdef CONFIG_DEBUG_FS
 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
+       struct drm_device *dev = gpu->dev;
        int i;
 
        adreno_show(gpu, m);
+
+       mutex_lock(&dev->struct_mutex);
+
+       gpu->funcs->pm_resume(gpu);
+
        seq_printf(m, "status:   %08x\n",
                        gpu_read(gpu, REG_A3XX_RBBM_STATUS));
 
@@ -362,16 +416,43 @@ static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
                        seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
                }
        }
+
+       gpu->funcs->pm_suspend(gpu);
+
+       mutex_unlock(&dev->struct_mutex);
 }
 #endif
 
+/* would be nice to not have to duplicate the _show() stuff with printk(): */
+static void a3xx_dump(struct msm_gpu *gpu)
+{
+       int i;
+
+       adreno_dump(gpu);
+       printk("status:   %08x\n",
+                       gpu_read(gpu, REG_A3XX_RBBM_STATUS));
+
+       /* dump these out in a form that can be parsed by demsm: */
+       printk("IO:region %s 00000000 00020000\n", gpu->name);
+       for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
+               uint32_t start = a3xx_registers[i];
+               uint32_t end   = a3xx_registers[i+1];
+               uint32_t addr;
+
+               for (addr = start; addr <= end; addr++) {
+                       uint32_t val = gpu_read(gpu, addr);
+                       printk("IO:R %08x %08x\n", addr<<2, val);
+               }
+       }
+}
+
 static const struct adreno_gpu_funcs funcs = {
        .base = {
                .get_param = adreno_get_param,
                .hw_init = a3xx_hw_init,
                .pm_suspend = msm_gpu_pm_suspend,
                .pm_resume = msm_gpu_pm_resume,
-               .recover = adreno_recover,
+               .recover = a3xx_recover,
                .last_fence = adreno_last_fence,
                .submit = adreno_submit,
                .flush = adreno_flush,
@@ -387,8 +468,10 @@ static const struct adreno_gpu_funcs funcs = {
 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 {
        struct a3xx_gpu *a3xx_gpu = NULL;
+       struct adreno_gpu *adreno_gpu;
        struct msm_gpu *gpu;
-       struct platform_device *pdev = a3xx_pdev;
+       struct msm_drm_private *priv = dev->dev_private;
+       struct platform_device *pdev = priv->gpu_pdev;
        struct adreno_platform_config *config;
        int ret;
 
@@ -406,24 +489,54 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
                goto fail;
        }
 
-       gpu = &a3xx_gpu->base.base;
+       adreno_gpu = &a3xx_gpu->base;
+       gpu = &adreno_gpu->base;
 
-       get_device(&pdev->dev);
        a3xx_gpu->pdev = pdev;
 
        gpu->fast_rate = config->fast_rate;
        gpu->slow_rate = config->slow_rate;
        gpu->bus_freq  = config->bus_freq;
+#ifdef CONFIG_MSM_BUS_SCALING
+       gpu->bus_scale_table = config->bus_scale_table;
+#endif
 
        DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
                        gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
 
-       ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base,
-                       &funcs, config->rev);
+       ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
        if (ret)
                goto fail;
 
-       return &a3xx_gpu->base.base;
+       /* if needed, allocate gmem: */
+       if (adreno_is_a330(adreno_gpu)) {
+#ifdef CONFIG_MSM_OCMEM
+               /* TODO this is different/missing upstream: */
+               struct ocmem_buf *ocmem_hdl =
+                               ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
+
+               a3xx_gpu->ocmem_hdl = ocmem_hdl;
+               a3xx_gpu->ocmem_base = ocmem_hdl->addr;
+               adreno_gpu->gmem = ocmem_hdl->len;
+               DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
+                               a3xx_gpu->ocmem_base);
+#endif
+       }
+
+       if (!gpu->mmu) {
+               /* TODO we think it is possible to configure the GPU to
+                * restrict access to VRAM carveout.  But the required
+                * registers are unknown.  For now just bail out and
+                * limp along with just modesetting.  If it turns out
+                * to not be possible to restrict access, then we must
+                * implement a cmdstream validator.
+                */
+               dev_err(dev->dev, "No memory protection without IOMMU\n");
+               ret = -ENXIO;
+               goto fail;
+       }
+
+       return gpu;
 
 fail:
        if (a3xx_gpu)
@@ -436,19 +549,66 @@ fail:
  * The a3xx device:
  */
 
-static int a3xx_probe(struct platform_device *pdev)
+#if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
+#  include <mach/kgsl.h>
+#endif
+
+static void set_gpu_pdev(struct drm_device *dev,
+               struct platform_device *pdev)
+{
+       struct msm_drm_private *priv = dev->dev_private;
+       priv->gpu_pdev = pdev;
+}
+
+static int a3xx_bind(struct device *dev, struct device *master, void *data)
 {
        static struct adreno_platform_config config = {};
 #ifdef CONFIG_OF
-       /* TODO */
+       struct device_node *child, *node = dev->of_node;
+       u32 val;
+       int ret;
+
+       ret = of_property_read_u32(node, "qcom,chipid", &val);
+       if (ret) {
+               dev_err(dev, "could not find chipid: %d\n", ret);
+               return ret;
+       }
+
+       config.rev = ADRENO_REV((val >> 24) & 0xff,
+                       (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+
+       /* find clock rates: */
+       config.fast_rate = 0;
+       config.slow_rate = ~0;
+       for_each_child_of_node(node, child) {
+               if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
+                       struct device_node *pwrlvl;
+                       for_each_child_of_node(child, pwrlvl) {
+                               ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
+                               if (ret) {
+                                       dev_err(dev, "could not find gpu-freq: %d\n", ret);
+                                       return ret;
+                               }
+                               config.fast_rate = max(config.fast_rate, val);
+                               config.slow_rate = min(config.slow_rate, val);
+                       }
+               }
+       }
+
+       if (!config.fast_rate) {
+               dev_err(dev, "could not find clk rates\n");
+               return -ENXIO;
+       }
+
 #else
+       struct kgsl_device_platform_data *pdata = dev->platform_data;
        uint32_t version = socinfo_get_version();
        if (cpu_is_apq8064ab()) {
                config.fast_rate = 450000000;
                config.slow_rate = 27000000;
                config.bus_freq  = 4;
                config.rev = ADRENO_REV(3, 2, 1, 0);
-       } else if (cpu_is_apq8064() || cpu_is_msm8960ab()) {
+       } else if (cpu_is_apq8064()) {
                config.fast_rate = 400000000;
                config.slow_rate = 27000000;
                config.bus_freq  = 4;
@@ -461,6 +621,16 @@ static int a3xx_probe(struct platform_device *pdev)
                else
                        config.rev = ADRENO_REV(3, 2, 0, 0);
 
+       } else if (cpu_is_msm8960ab()) {
+               config.fast_rate = 400000000;
+               config.slow_rate = 320000000;
+               config.bus_freq  = 4;
+
+               if (SOCINFO_VERSION_MINOR(version) == 0)
+                       config.rev = ADRENO_REV(3, 2, 1, 0);
+               else
+                       config.rev = ADRENO_REV(3, 2, 1, 1);
+
        } else if (cpu_is_msm8930()) {
                config.fast_rate = 400000000;
                config.slow_rate = 27000000;
@@ -473,22 +643,49 @@ static int a3xx_probe(struct platform_device *pdev)
                        config.rev = ADRENO_REV(3, 0, 5, 0);
 
        }
+#  ifdef CONFIG_MSM_BUS_SCALING
+       config.bus_scale_table = pdata->bus_scale_table;
+#  endif
 #endif
-       pdev->dev.platform_data = &config;
-       a3xx_pdev = pdev;
+       dev->platform_data = &config;
+       set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
        return 0;
 }
 
+static void a3xx_unbind(struct device *dev, struct device *master,
+               void *data)
+{
+       set_gpu_pdev(dev_get_drvdata(master), NULL);
+}
+
+static const struct component_ops a3xx_ops = {
+               .bind   = a3xx_bind,
+               .unbind = a3xx_unbind,
+};
+
+static int a3xx_probe(struct platform_device *pdev)
+{
+       return component_add(&pdev->dev, &a3xx_ops);
+}
+
 static int a3xx_remove(struct platform_device *pdev)
 {
-       a3xx_pdev = NULL;
+       component_del(&pdev->dev, &a3xx_ops);
        return 0;
 }
 
+static const struct of_device_id dt_match[] = {
+       { .compatible = "qcom,kgsl-3d0" },
+       {}
+};
+
 static struct platform_driver a3xx_driver = {
        .probe = a3xx_probe,
        .remove = a3xx_remove,
-       .driver.name = "kgsl-3d0",
+       .driver = {
+               .name = "kgsl-3d0",
+               .of_match_table = dt_match,
+       },
 };
 
 void __init a3xx_register(void)