[linux-2.6-block.git] / drivers / gpu / drm / msm / msm_gpu.h

/*
 * Copyright (C) 2013 Red Hat
 * Author: Rob Clark <robdclark@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef __MSM_GPU_H__
#define __MSM_GPU_H__

#include <linux/clk.h>
#include <linux/regulator/consumer.h>

#include "msm_drv.h"
#include "msm_fence.h"
#include "msm_ringbuffer.h"

struct msm_gem_submit;
struct msm_gpu_perfcntr;

struct msm_gpu_config {
	const char *ioname;
	const char *irqname;
	uint64_t va_start;
	uint64_t va_end;
	unsigned int ringsz;
};

/* So far, with hardware that I've seen to date, we can have:
 *  + zero, one, or two z180 2d cores
 *  + a3xx or a2xx 3d core, which share a common CP (the firmware
 *    for the CP seems to implement some different PM4 packet types
 *    but the basics of cmdstream submission are the same)
 *
 * Which means that the eventual complete "class" hierarchy, once
 * support for all past and present hw is in place, becomes:
 *  + msm_gpu
 *    + adreno_gpu
 *      + a3xx_gpu
 *      + a2xx_gpu
 *    + z180_gpu
 */
struct msm_gpu_funcs {
	int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
	int (*hw_init)(struct msm_gpu *gpu);
	int (*pm_suspend)(struct msm_gpu *gpu);
	int (*pm_resume)(struct msm_gpu *gpu);
	void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit,
			struct msm_file_private *ctx);
	void (*flush)(struct msm_gpu *gpu);
	irqreturn_t (*irq)(struct msm_gpu *irq);
	uint32_t (*last_fence)(struct msm_gpu *gpu);
	void (*recover)(struct msm_gpu *gpu);
	void (*destroy)(struct msm_gpu *gpu);
#ifdef CONFIG_DEBUG_FS
	/* show GPU status in debugfs: */
	void (*show)(struct msm_gpu *gpu, struct seq_file *m);
#endif
};

struct msm_gpu {
	const char *name;
	struct drm_device *dev;
	struct platform_device *pdev;
	const struct msm_gpu_funcs *funcs;

	/* performance counters (hw & sw): */
	spinlock_t perf_lock;
	bool perfcntr_active;
	struct {
		bool active;
		ktime_t time;
	} last_sample;
	uint32_t totaltime, activetime;    /* sw counters */
	uint32_t last_cntrs[5];            /* hw counters */
	const struct msm_gpu_perfcntr *perfcntrs;
	uint32_t num_perfcntrs;

	/* ringbuffer: */
	struct msm_ringbuffer *rb;
	uint64_t rb_iova;

	/* list of GEM active objects: */
	struct list_head active_list;

	/* fencing: */
	struct msm_fence_context *fctx;

	/* does gpu need hw_init? */
	bool needs_hw_init;

	/* worker for handling active-list retiring: */
	struct work_struct retire_work;

	void __iomem *mmio;
	int irq;

	struct msm_gem_address_space *aspace;

	/* Power Control: */
	struct regulator *gpu_reg, *gpu_cx;
	struct clk **grp_clks;
	int nr_clocks;
	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
	uint32_t fast_rate, bus_freq;

#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
	struct msm_bus_scale_pdata *bus_scale_table;
	uint32_t bsc;
#endif

	/* Hang and Inactivity Detection:
	 */
#define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */

#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
	struct timer_list hangcheck_timer;
	uint32_t hangcheck_fence;
	struct work_struct recover_work;

	struct list_head submit_list;
};

static inline bool msm_gpu_active(struct msm_gpu *gpu)
{
	return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu);
}

/* Perf-Counters:
 * The select_reg and select_val are just there for the benefit of the child
 * class that actually enables the perf counter..  but msm_gpu base class
 * will handle sampling/displaying the counters.
 */

struct msm_gpu_perfcntr {
	uint32_t select_reg;
	uint32_t sample_reg;
	uint32_t select_val;
	const char *name;
};

static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
{
	msm_writel(data, gpu->mmio + (reg << 2));
}

static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
{
	return msm_readl(gpu->mmio + (reg << 2));
}

static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
{
	uint32_t val = gpu_read(gpu, reg);

	val &= ~mask;
	gpu_write(gpu, reg, val | or);
}

static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
{
	u64 val;

	/*
	 * Why not a readq here? Two reasons: 1) many of the LO registers are
	 * not quad word aligned and 2) the GPU hardware designers have a bit
	 * of a history of putting registers where they fit, especially in
	 * spins. The longer a GPU family goes the higher the chance that
	 * we'll get burned.  We could do a series of validity checks if we
	 * wanted to, but really is a readq() that much better? Nah.
	 */

	/*
	 * For some lo/hi registers (like perfcounters), the hi value is latched
	 * when the lo is read, so make sure to read the lo first to trigger
	 * that
	 */
	val = (u64) msm_readl(gpu->mmio + (lo << 2));
	val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);

	return val;
}

static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
{
	/* Why not a writeq here? Read the screed above */
	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
}

int msm_gpu_pm_suspend(struct msm_gpu *gpu);
int msm_gpu_pm_resume(struct msm_gpu *gpu);

int msm_gpu_hw_init(struct msm_gpu *gpu);

void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);

void msm_gpu_retire(struct msm_gpu *gpu);
void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
		struct msm_file_private *ctx);

int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
		const char *name, struct msm_gpu_config *config);

void msm_gpu_cleanup(struct msm_gpu *gpu);

struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
void __init adreno_register(void);
void __exit adreno_unregister(void);

#endif /* __MSM_GPU_H__ */
Commit	Line	Data
7198e6b0 RC	1	/*
	2	* Copyright (C) 2013 Red Hat
	3	* Author: Rob Clark <robdclark@gmail.com>
	4	*
	5	* This program is free software; you can redistribute it and/or modify it
	6	* under the terms of the GNU General Public License version 2 as published by
	7	* the Free Software Foundation.
	8	*
	9	* This program is distributed in the hope that it will be useful, but WITHOUT
	10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	12	* more details.
	13	*
	14	* You should have received a copy of the GNU General Public License along with
	15	* this program. If not, see <http://www.gnu.org/licenses/>.
	16	*/
	17
	18	#ifndef __MSM_GPU_H__
	19	#define __MSM_GPU_H__
	20
	21	#include <linux/clk.h>
	22	#include <linux/regulator/consumer.h>
	23
	24	#include "msm_drv.h"
ca762a8a	25	#include "msm_fence.h"
7198e6b0 RC	26	#include "msm_ringbuffer.h"
	27
	28	struct msm_gem_submit;
70c70f09	29	struct msm_gpu_perfcntr;
7198e6b0	30
5770fc7a JC	31	struct msm_gpu_config {
	32	const char *ioname;
	33	const char *irqname;
	34	uint64_t va_start;
	35	uint64_t va_end;
	36	unsigned int ringsz;
	37	};
	38
7198e6b0 RC	39	/* So far, with hardware that I've seen to date, we can have:
	40	* + zero, one, or two z180 2d cores
	41	* + a3xx or a2xx 3d core, which share a common CP (the firmware
	42	* for the CP seems to implement some different PM4 packet types
	43	* but the basics of cmdstream submission are the same)
	44	*
	45	* Which means that the eventual complete "class" hierarchy, once
	46	* support for all past and present hw is in place, becomes:
	47	* + msm_gpu
	48	* + adreno_gpu
	49	* + a3xx_gpu
	50	* + a2xx_gpu
	51	* + z180_gpu
	52	*/
	53	struct msm_gpu_funcs {
	54	int (get_param)(struct msm_gpu gpu, uint32_t param, uint64_t *value);
	55	int (hw_init)(struct msm_gpu gpu);
	56	int (pm_suspend)(struct msm_gpu gpu);
	57	int (pm_resume)(struct msm_gpu gpu);
1193c3bc	58	void (submit)(struct msm_gpu gpu, struct msm_gem_submit *submit,
7198e6b0 RC	59	struct msm_file_private *ctx);
7198e6b0 RC	60	void (flush)(struct msm_gpu gpu);
7198e6b0 RC	61	irqreturn_t (irq)(struct msm_gpu irq);
7198e6b0 RC	62	uint32_t (last_fence)(struct msm_gpu gpu);
bd6f82d8	63	void (recover)(struct msm_gpu gpu);
7198e6b0 RC	64	void (destroy)(struct msm_gpu gpu);
	65	#ifdef CONFIG_DEBUG_FS
	66	/* show GPU status in debugfs: */
	67	void (show)(struct msm_gpu gpu, struct seq_file *m);
	68	#endif
	69	};
	70
	71	struct msm_gpu {
	72	const char *name;
	73	struct drm_device *dev;
eeb75474	74	struct platform_device *pdev;
7198e6b0 RC	75	const struct msm_gpu_funcs *funcs;
7198e6b0 RC	76
70c70f09 RC	77	/* performance counters (hw & sw): */
	78	spinlock_t perf_lock;
	79	bool perfcntr_active;
	80	struct {
	81	bool active;
	82	ktime_t time;
	83	} last_sample;
	84	uint32_t totaltime, activetime; /* sw counters */
	85	uint32_t last_cntrs[5]; /* hw counters */
	86	const struct msm_gpu_perfcntr *perfcntrs;
	87	uint32_t num_perfcntrs;
	88
ca762a8a	89	/* ringbuffer: */
7198e6b0	90	struct msm_ringbuffer *rb;
78babc16	91	uint64_t rb_iova;
7198e6b0 RC	92
	93	/* list of GEM active objects: */
	94	struct list_head active_list;
	95
ca762a8a RC	96	/* fencing: */
ca762a8a RC	97	struct msm_fence_context *fctx;
bd6f82d8	98
eeb75474 RC	99	/* does gpu need hw_init? */
eeb75474 RC	100	bool needs_hw_init;
37d77c3a	101
7198e6b0 RC	102	/* worker for handling active-list retiring: */
	103	struct work_struct retire_work;
	104
	105	void __iomem *mmio;
	106	int irq;
	107
667ce33e	108	struct msm_gem_address_space *aspace;
7198e6b0 RC	109
	110	/* Power Control: */
	111	struct regulator gpu_reg, gpu_cx;
98db803f JC	112	struct clk **grp_clks;
	113	int nr_clocks;
	114	struct clk ebi1_clk, core_clk, *rbbmtimer_clk;
bf5af4ae	115	uint32_t fast_rate, bus_freq;
bf2b33af	116
6490ad47	117	#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
bf2b33af	118	struct msm_bus_scale_pdata *bus_scale_table;
7198e6b0	119	uint32_t bsc;
bf2b33af	120	#endif
bd6f82d8	121
37d77c3a RC	122	/* Hang and Inactivity Detection:
	123	*/
	124	#define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */
eeb75474	125
bd6f82d8 RC	126	#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
	127	#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
	128	struct timer_list hangcheck_timer;
	129	uint32_t hangcheck_fence;
	130	struct work_struct recover_work;
1a370be9 RC	131
1a370be9 RC	132	struct list_head submit_list;
7198e6b0 RC	133	};
7198e6b0 RC	134
37d77c3a RC	135	static inline bool msm_gpu_active(struct msm_gpu *gpu)
37d77c3a RC	136	{
ca762a8a	137	return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu);
37d77c3a RC	138	}
37d77c3a RC	139
70c70f09 RC	140	/* Perf-Counters:
	141	* The select_reg and select_val are just there for the benefit of the child
	142	* class that actually enables the perf counter.. but msm_gpu base class
	143	* will handle sampling/displaying the counters.
	144	*/
	145
	146	struct msm_gpu_perfcntr {
	147	uint32_t select_reg;
	148	uint32_t sample_reg;
	149	uint32_t select_val;
	150	const char *name;
	151	};
	152
7198e6b0 RC	153	static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
	154	{
	155	msm_writel(data, gpu->mmio + (reg << 2));
	156	}
	157
	158	static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
	159	{
	160	return msm_readl(gpu->mmio + (reg << 2));
	161	}
	162
ae53a829 JC	163	static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
	164	{
	165	uint32_t val = gpu_read(gpu, reg);
	166
	167	val &= ~mask;
	168	gpu_write(gpu, reg, val \| or);
	169	}
	170
	171	static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
	172	{
	173	u64 val;
	174
	175	/*
	176	* Why not a readq here? Two reasons: 1) many of the LO registers are
	177	* not quad word aligned and 2) the GPU hardware designers have a bit
	178	* of a history of putting registers where they fit, especially in
	179	* spins. The longer a GPU family goes the higher the chance that
	180	* we'll get burned. We could do a series of validity checks if we
	181	* wanted to, but really is a readq() that much better? Nah.
	182	*/
	183
	184	/*
	185	* For some lo/hi registers (like perfcounters), the hi value is latched
	186	* when the lo is read, so make sure to read the lo first to trigger
	187	* that
	188	*/
	189	val = (u64) msm_readl(gpu->mmio + (lo << 2));
	190	val \|= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
	191
	192	return val;
	193	}
	194
	195	static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
	196	{
	197	/* Why not a writeq here? Read the screed above */
	198	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
	199	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
	200	}
	201
7198e6b0 RC	202	int msm_gpu_pm_suspend(struct msm_gpu *gpu);
	203	int msm_gpu_pm_resume(struct msm_gpu *gpu);
	204
eeb75474 RC	205	int msm_gpu_hw_init(struct msm_gpu *gpu);
eeb75474 RC	206
70c70f09 RC	207	void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
	208	void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
	209	int msm_gpu_perfcntr_sample(struct msm_gpu gpu, uint32_t activetime,
	210	uint32_t totaltime, uint32_t ncntrs, uint32_t cntrs);
	211
7198e6b0	212	void msm_gpu_retire(struct msm_gpu *gpu);
f44d32c7	213	void msm_gpu_submit(struct msm_gpu gpu, struct msm_gem_submit submit,
7198e6b0 RC	214	struct msm_file_private *ctx);
	215
	216	int msm_gpu_init(struct drm_device drm, struct platform_device pdev,
	217	struct msm_gpu gpu, const struct msm_gpu_funcs funcs,
5770fc7a JC	218	const char name, struct msm_gpu_config config);
5770fc7a JC	219
7198e6b0 RC	220	void msm_gpu_cleanup(struct msm_gpu *gpu);
7198e6b0 RC	221
e2550b7a	222	struct msm_gpu adreno_load_gpu(struct drm_device dev);
bfd28b13 RC	223	void __init adreno_register(void);
bfd28b13 RC	224	void __exit adreno_unregister(void);
7198e6b0 RC	225
7198e6b0 RC	226	#endif /* __MSM_GPU_H__ */