Commit | Line | Data |
---|---|---|
caab277b | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
7198e6b0 RC |
2 | /* |
3 | * Copyright (C) 2013 Red Hat | |
4 | * Author: Rob Clark <robdclark@gmail.com> | |
7198e6b0 RC |
5 | */ |
6 | ||
7 | #ifndef __MSM_GPU_H__ | |
8 | #define __MSM_GPU_H__ | |
9 | ||
10 | #include <linux/clk.h> | |
fcf9d0b7 | 11 | #include <linux/interconnect.h> |
1f60d114 | 12 | #include <linux/pm_opp.h> |
7198e6b0 RC |
13 | #include <linux/regulator/consumer.h> |
14 | ||
15 | #include "msm_drv.h" | |
ca762a8a | 16 | #include "msm_fence.h" |
7198e6b0 | 17 | #include "msm_ringbuffer.h" |
604234f3 | 18 | #include "msm_gem.h" |
7198e6b0 RC |
19 | |
20 | struct msm_gem_submit; | |
70c70f09 | 21 | struct msm_gpu_perfcntr; |
e00e473d | 22 | struct msm_gpu_state; |
7198e6b0 | 23 | |
5770fc7a JC |
24 | struct msm_gpu_config { |
25 | const char *ioname; | |
f97decac | 26 | unsigned int nr_rings; |
5770fc7a JC |
27 | }; |
28 | ||
7198e6b0 RC |
29 | /* So far, with hardware that I've seen to date, we can have: |
30 | * + zero, one, or two z180 2d cores | |
31 | * + a3xx or a2xx 3d core, which share a common CP (the firmware | |
32 | * for the CP seems to implement some different PM4 packet types | |
33 | * but the basics of cmdstream submission are the same) | |
34 | * | |
35 | * Which means that the eventual complete "class" hierarchy, once | |
36 | * support for all past and present hw is in place, becomes: | |
37 | * + msm_gpu | |
38 | * + adreno_gpu | |
39 | * + a3xx_gpu | |
40 | * + a2xx_gpu | |
41 | * + z180_gpu | |
42 | */ | |
43 | struct msm_gpu_funcs { | |
44 | int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value); | |
45 | int (*hw_init)(struct msm_gpu *gpu); | |
46 | int (*pm_suspend)(struct msm_gpu *gpu); | |
47 | int (*pm_resume)(struct msm_gpu *gpu); | |
1193c3bc | 48 | void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
7198e6b0 | 49 | struct msm_file_private *ctx); |
f97decac | 50 | void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); |
7198e6b0 | 51 | irqreturn_t (*irq)(struct msm_gpu *irq); |
f97decac | 52 | struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); |
bd6f82d8 | 53 | void (*recover)(struct msm_gpu *gpu); |
7198e6b0 | 54 | void (*destroy)(struct msm_gpu *gpu); |
c878a628 | 55 | #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) |
7198e6b0 | 56 | /* show GPU status in debugfs: */ |
4f776f45 | 57 | void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state, |
c0fec7f5 | 58 | struct drm_printer *p); |
331dc0bc | 59 | /* for generation specific debugfs: */ |
7ce84471 | 60 | void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); |
7198e6b0 | 61 | #endif |
de0a3d09 | 62 | unsigned long (*gpu_busy)(struct msm_gpu *gpu); |
e00e473d | 63 | struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); |
c0fec7f5 | 64 | int (*gpu_state_put)(struct msm_gpu_state *state); |
de0a3d09 | 65 | unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); |
1f60d114 | 66 | void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp); |
ccac7ce3 JC |
67 | struct msm_gem_address_space *(*create_address_space) |
68 | (struct msm_gpu *gpu, struct platform_device *pdev); | |
7198e6b0 RC |
69 | }; |
70 | ||
71 | struct msm_gpu { | |
72 | const char *name; | |
73 | struct drm_device *dev; | |
eeb75474 | 74 | struct platform_device *pdev; |
7198e6b0 RC |
75 | const struct msm_gpu_funcs *funcs; |
76 | ||
70c70f09 RC |
77 | /* performance counters (hw & sw): */ |
78 | spinlock_t perf_lock; | |
79 | bool perfcntr_active; | |
80 | struct { | |
81 | bool active; | |
82 | ktime_t time; | |
83 | } last_sample; | |
84 | uint32_t totaltime, activetime; /* sw counters */ | |
85 | uint32_t last_cntrs[5]; /* hw counters */ | |
86 | const struct msm_gpu_perfcntr *perfcntrs; | |
87 | uint32_t num_perfcntrs; | |
88 | ||
f97decac JC |
89 | struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; |
90 | int nr_rings; | |
7198e6b0 RC |
91 | |
92 | /* list of GEM active objects: */ | |
93 | struct list_head active_list; | |
94 | ||
eeb75474 RC |
95 | /* does gpu need hw_init? */ |
96 | bool needs_hw_init; | |
37d77c3a | 97 | |
48dc4241 RC |
98 | /* number of GPU hangs (for all contexts) */ |
99 | int global_faults; | |
100 | ||
7198e6b0 RC |
101 | /* worker for handling active-list retiring: */ |
102 | struct work_struct retire_work; | |
103 | ||
104 | void __iomem *mmio; | |
105 | int irq; | |
106 | ||
667ce33e | 107 | struct msm_gem_address_space *aspace; |
7198e6b0 RC |
108 | |
109 | /* Power Control: */ | |
110 | struct regulator *gpu_reg, *gpu_cx; | |
8e54eea5 | 111 | struct clk_bulk_data *grp_clks; |
98db803f JC |
112 | int nr_clocks; |
113 | struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; | |
1babd706 | 114 | uint32_t fast_rate; |
bd6f82d8 | 115 | |
00bb9243 | 116 | /* The gfx-mem interconnect path that's used by all GPU types. */ |
fcf9d0b7 JC |
117 | struct icc_path *icc_path; |
118 | ||
00bb9243 BM |
119 | /* |
120 | * Second interconnect path for some A3xx and all A4xx GPUs to the | |
121 | * On Chip MEMory (OCMEM). | |
122 | */ | |
123 | struct icc_path *ocmem_icc_path; | |
124 | ||
37d77c3a RC |
125 | /* Hang and Inactivity Detection: |
126 | */ | |
127 | #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ | |
eeb75474 | 128 | |
bd6f82d8 RC |
129 | #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ |
130 | #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) | |
131 | struct timer_list hangcheck_timer; | |
bd6f82d8 | 132 | struct work_struct recover_work; |
1a370be9 | 133 | |
cd414f3d | 134 | struct drm_gem_object *memptrs_bo; |
f91c14ab JC |
135 | |
136 | struct { | |
137 | struct devfreq *devfreq; | |
138 | u64 busy_cycles; | |
139 | ktime_t time; | |
140 | } devfreq; | |
c0fec7f5 JC |
141 | |
142 | struct msm_gpu_state *crashstate; | |
604234f3 JC |
143 | /* True if the hardware supports expanded apriv (a650 and newer) */ |
144 | bool hw_apriv; | |
7198e6b0 RC |
145 | }; |
146 | ||
f97decac JC |
147 | /* It turns out that all targets use the same ringbuffer size */ |
148 | #define MSM_GPU_RINGBUFFER_SZ SZ_32K | |
4d87fc32 JC |
149 | #define MSM_GPU_RINGBUFFER_BLKSIZE 32 |
150 | ||
151 | #define MSM_GPU_RB_CNTL_DEFAULT \ | |
152 | (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ | |
153 | AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) | |
f97decac | 154 | |
37d77c3a RC |
155 | static inline bool msm_gpu_active(struct msm_gpu *gpu) |
156 | { | |
f97decac JC |
157 | int i; |
158 | ||
159 | for (i = 0; i < gpu->nr_rings; i++) { | |
160 | struct msm_ringbuffer *ring = gpu->rb[i]; | |
161 | ||
162 | if (ring->seqno > ring->memptrs->fence) | |
163 | return true; | |
164 | } | |
165 | ||
166 | return false; | |
37d77c3a RC |
167 | } |
168 | ||
70c70f09 RC |
169 | /* Perf-Counters: |
170 | * The select_reg and select_val are just there for the benefit of the child | |
171 | * class that actually enables the perf counter.. but msm_gpu base class | |
172 | * will handle sampling/displaying the counters. | |
173 | */ | |
174 | ||
175 | struct msm_gpu_perfcntr { | |
176 | uint32_t select_reg; | |
177 | uint32_t sample_reg; | |
178 | uint32_t select_val; | |
179 | const char *name; | |
180 | }; | |
181 | ||
f7de1545 JC |
182 | struct msm_gpu_submitqueue { |
183 | int id; | |
184 | u32 flags; | |
185 | u32 prio; | |
186 | int faults; | |
187 | struct list_head node; | |
188 | struct kref ref; | |
189 | }; | |
190 | ||
cdb95931 JC |
191 | struct msm_gpu_state_bo { |
192 | u64 iova; | |
193 | size_t size; | |
194 | void *data; | |
1df4289d | 195 | bool encoded; |
cdb95931 JC |
196 | }; |
197 | ||
e00e473d | 198 | struct msm_gpu_state { |
c0fec7f5 | 199 | struct kref ref; |
3530a17f | 200 | struct timespec64 time; |
e00e473d JC |
201 | |
202 | struct { | |
203 | u64 iova; | |
204 | u32 fence; | |
205 | u32 seqno; | |
206 | u32 rptr; | |
207 | u32 wptr; | |
43a56687 JC |
208 | void *data; |
209 | int data_size; | |
1df4289d | 210 | bool encoded; |
e00e473d JC |
211 | } ring[MSM_GPU_MAX_RINGS]; |
212 | ||
213 | int nr_registers; | |
214 | u32 *registers; | |
215 | ||
216 | u32 rbbm_status; | |
c0fec7f5 JC |
217 | |
218 | char *comm; | |
219 | char *cmd; | |
cdb95931 JC |
220 | |
221 | int nr_bos; | |
222 | struct msm_gpu_state_bo *bos; | |
e00e473d JC |
223 | }; |
224 | ||
7198e6b0 RC |
225 | static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) |
226 | { | |
227 | msm_writel(data, gpu->mmio + (reg << 2)); | |
228 | } | |
229 | ||
230 | static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) | |
231 | { | |
232 | return msm_readl(gpu->mmio + (reg << 2)); | |
233 | } | |
234 | ||
ae53a829 JC |
235 | static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) |
236 | { | |
237 | uint32_t val = gpu_read(gpu, reg); | |
238 | ||
239 | val &= ~mask; | |
240 | gpu_write(gpu, reg, val | or); | |
241 | } | |
242 | ||
243 | static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) | |
244 | { | |
245 | u64 val; | |
246 | ||
247 | /* | |
248 | * Why not a readq here? Two reasons: 1) many of the LO registers are | |
249 | * not quad word aligned and 2) the GPU hardware designers have a bit | |
250 | * of a history of putting registers where they fit, especially in | |
251 | * spins. The longer a GPU family goes the higher the chance that | |
252 | * we'll get burned. We could do a series of validity checks if we | |
253 | * wanted to, but really is a readq() that much better? Nah. | |
254 | */ | |
255 | ||
256 | /* | |
257 | * For some lo/hi registers (like perfcounters), the hi value is latched | |
258 | * when the lo is read, so make sure to read the lo first to trigger | |
259 | * that | |
260 | */ | |
261 | val = (u64) msm_readl(gpu->mmio + (lo << 2)); | |
262 | val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); | |
263 | ||
264 | return val; | |
265 | } | |
266 | ||
267 | static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) | |
268 | { | |
269 | /* Why not a writeq here? Read the screed above */ | |
270 | msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); | |
271 | msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); | |
272 | } | |
273 | ||
7198e6b0 RC |
274 | int msm_gpu_pm_suspend(struct msm_gpu *gpu); |
275 | int msm_gpu_pm_resume(struct msm_gpu *gpu); | |
de0a3d09 | 276 | void msm_gpu_resume_devfreq(struct msm_gpu *gpu); |
7198e6b0 | 277 | |
eeb75474 RC |
278 | int msm_gpu_hw_init(struct msm_gpu *gpu); |
279 | ||
70c70f09 RC |
280 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu); |
281 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); | |
282 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, | |
283 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); | |
284 | ||
7198e6b0 | 285 | void msm_gpu_retire(struct msm_gpu *gpu); |
f44d32c7 | 286 | void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
7198e6b0 RC |
287 | struct msm_file_private *ctx); |
288 | ||
289 | int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, | |
290 | struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, | |
5770fc7a JC |
291 | const char *name, struct msm_gpu_config *config); |
292 | ||
7198e6b0 RC |
293 | void msm_gpu_cleanup(struct msm_gpu *gpu); |
294 | ||
e2550b7a | 295 | struct msm_gpu *adreno_load_gpu(struct drm_device *dev); |
bfd28b13 RC |
296 | void __init adreno_register(void); |
297 | void __exit adreno_unregister(void); | |
7198e6b0 | 298 | |
f7de1545 JC |
299 | static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) |
300 | { | |
301 | if (queue) | |
302 | kref_put(&queue->ref, msm_submitqueue_destroy); | |
303 | } | |
304 | ||
c0fec7f5 JC |
305 | static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu) |
306 | { | |
307 | struct msm_gpu_state *state = NULL; | |
308 | ||
309 | mutex_lock(&gpu->dev->struct_mutex); | |
310 | ||
311 | if (gpu->crashstate) { | |
312 | kref_get(&gpu->crashstate->ref); | |
313 | state = gpu->crashstate; | |
314 | } | |
315 | ||
316 | mutex_unlock(&gpu->dev->struct_mutex); | |
317 | ||
318 | return state; | |
319 | } | |
320 | ||
321 | static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) | |
322 | { | |
323 | mutex_lock(&gpu->dev->struct_mutex); | |
324 | ||
325 | if (gpu->crashstate) { | |
326 | if (gpu->funcs->gpu_state_put(gpu->crashstate)) | |
327 | gpu->crashstate = NULL; | |
328 | } | |
329 | ||
330 | mutex_unlock(&gpu->dev->struct_mutex); | |
331 | } | |
332 | ||
604234f3 JC |
333 | /* |
334 | * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can | |
335 | * support expanded privileges | |
336 | */ | |
337 | #define check_apriv(gpu, flags) \ | |
338 | (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags)) | |
339 | ||
340 | ||
7198e6b0 | 341 | #endif /* __MSM_GPU_H__ */ |