Commit | Line | Data |
---|---|---|
57692c94 EA |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* Copyright (C) 2014-2018 Broadcom */ | |
3 | ||
220989e7 SR |
4 | #include <linux/device.h> |
5 | #include <linux/dma-mapping.h> | |
6 | #include <linux/io.h> | |
57692c94 EA |
7 | #include <linux/module.h> |
8 | #include <linux/platform_device.h> | |
eea9b97b | 9 | #include <linux/reset.h> |
57692c94 | 10 | #include <linux/sched/signal.h> |
220989e7 SR |
11 | #include <linux/uaccess.h> |
12 | ||
91d502f6 | 13 | #include <drm/drm_managed.h> |
57692c94 | 14 | |
57692c94 EA |
15 | #include "v3d_drv.h" |
16 | #include "v3d_regs.h" | |
17 | #include "v3d_trace.h" | |
18 | ||
19 | static void | |
20 | v3d_init_core(struct v3d_dev *v3d, int core) | |
21 | { | |
22 | /* Set OVRTMUOUT, which means that the texture sampler uniform | |
23 | * configuration's tmu output type field is used, instead of | |
24 | * using the hardware default behavior based on the texture | |
25 | * type. If you want the default behavior, you can still put | |
26 | * "2" in the indirect texture state's output_type field. | |
27 | */ | |
a7dde1b7 EA |
28 | if (v3d->ver < 40) |
29 | V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); | |
57692c94 EA |
30 | |
31 | /* Whenever we flush the L2T cache, we always want to flush | |
32 | * the whole thing. | |
33 | */ | |
34 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); | |
35 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); | |
36 | } | |
37 | ||
38 | /* Sets invariant state for the HW. */ | |
39 | static void | |
40 | v3d_init_hw_state(struct v3d_dev *v3d) | |
41 | { | |
42 | v3d_init_core(v3d, 0); | |
43 | } | |
44 | ||
45 | static void | |
46 | v3d_idle_axi(struct v3d_dev *v3d, int core) | |
47 | { | |
0ad5bc1c | 48 | V3D_CORE_WRITE(core, V3D_GMP_CFG(v3d->ver), V3D_GMP_CFG_STOP_REQ); |
57692c94 | 49 | |
0ad5bc1c | 50 | if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS(v3d->ver)) & |
57692c94 EA |
51 | (V3D_GMP_STATUS_RD_COUNT_MASK | |
52 | V3D_GMP_STATUS_WR_COUNT_MASK | | |
53 | V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { | |
54 | DRM_ERROR("Failed to wait for safe GMP shutdown\n"); | |
55 | } | |
56 | } | |
57 | ||
58 | static void | |
59 | v3d_idle_gca(struct v3d_dev *v3d) | |
60 | { | |
61 | if (v3d->ver >= 41) | |
62 | return; | |
63 | ||
64 | V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); | |
65 | ||
66 | if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & | |
67 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == | |
68 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { | |
69 | DRM_ERROR("Failed to wait for safe GCA shutdown\n"); | |
70 | } | |
71 | } | |
72 | ||
73 | static void | |
eea9b97b | 74 | v3d_reset_by_bridge(struct v3d_dev *v3d) |
57692c94 EA |
75 | { |
76 | int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); | |
77 | ||
78 | if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { | |
79 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, | |
80 | V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); | |
81 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); | |
82 | ||
83 | /* GFXH-1383: The SW_INIT may cause a stray write to address 0 | |
84 | * of the unit, so reset it to its power-on value here. | |
85 | */ | |
86 | V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); | |
87 | } else { | |
88 | WARN_ON_ONCE(V3D_GET_FIELD(version, | |
89 | V3D_TOP_GR_BRIDGE_MAJOR) != 7); | |
90 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, | |
91 | V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); | |
92 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); | |
93 | } | |
eea9b97b EA |
94 | } |
95 | ||
96 | static void | |
97 | v3d_reset_v3d(struct v3d_dev *v3d) | |
98 | { | |
99 | if (v3d->reset) | |
100 | reset_control_reset(v3d->reset); | |
101 | else | |
102 | v3d_reset_by_bridge(v3d); | |
57692c94 EA |
103 | |
104 | v3d_init_hw_state(v3d); | |
105 | } | |
106 | ||
107 | void | |
108 | v3d_reset(struct v3d_dev *v3d) | |
109 | { | |
110 | struct drm_device *dev = &v3d->drm; | |
111 | ||
1ba9d7cb EA |
112 | DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n"); |
113 | DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n", | |
114 | V3D_CORE_READ(0, V3D_ERR_STAT)); | |
57692c94 EA |
115 | trace_v3d_reset_begin(dev); |
116 | ||
117 | /* XXX: only needed for safe powerdown, not reset. */ | |
118 | if (false) | |
119 | v3d_idle_axi(v3d, 0); | |
120 | ||
121 | v3d_idle_gca(v3d); | |
122 | v3d_reset_v3d(v3d); | |
123 | ||
124 | v3d_mmu_set_page_table(v3d); | |
125 | v3d_irq_reset(v3d); | |
126 | ||
26a4dc29 JSR |
127 | v3d_perfmon_stop(v3d, v3d->active_perfmon, false); |
128 | ||
57692c94 EA |
129 | trace_v3d_reset_end(dev); |
130 | } | |
131 | ||
132 | static void | |
133 | v3d_flush_l3(struct v3d_dev *v3d) | |
134 | { | |
135 | if (v3d->ver < 41) { | |
136 | u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); | |
137 | ||
138 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
139 | gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); | |
140 | ||
141 | if (v3d->ver < 33) { | |
142 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
143 | gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); | |
144 | } | |
145 | } | |
146 | } | |
147 | ||
7b9d2fe4 EA |
148 | /* Invalidates the (read-only) L2C cache. This was the L2 cache for |
149 | * uniforms and instructions on V3D 3.2. | |
150 | */ | |
57692c94 | 151 | static void |
7b9d2fe4 | 152 | v3d_invalidate_l2c(struct v3d_dev *v3d, int core) |
57692c94 | 153 | { |
7b9d2fe4 EA |
154 | if (v3d->ver > 32) |
155 | return; | |
156 | ||
57692c94 EA |
157 | V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, |
158 | V3D_L2CACTL_L2CCLR | | |
159 | V3D_L2CACTL_L2CENA); | |
160 | } | |
161 | ||
57692c94 EA |
162 | /* Invalidates texture L2 cachelines */ |
163 | static void | |
164 | v3d_flush_l2t(struct v3d_dev *v3d, int core) | |
165 | { | |
51c1b6f9 EA |
166 | /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't |
167 | * need to wait for completion before dispatching the job -- | |
168 | * L2T accesses will be stalled until the flush has completed. | |
d223f98f EA |
169 | * However, we do need to make sure we don't try to trigger a |
170 | * new flush while the L2_CLEAN queue is trying to | |
171 | * synchronously clean after a job. | |
51c1b6f9 | 172 | */ |
d223f98f | 173 | mutex_lock(&v3d->cache_clean_lock); |
57692c94 EA |
174 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, |
175 | V3D_L2TCACTL_L2TFLS | | |
176 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); | |
d223f98f EA |
177 | mutex_unlock(&v3d->cache_clean_lock); |
178 | } | |
179 | ||
180 | /* Cleans texture L1 and L2 cachelines (writing back dirty data). | |
181 | * | |
182 | * For cleaning, which happens from the CACHE_CLEAN queue after CSD has | |
183 | * executed, we need to make sure that the clean is done before | |
184 | * signaling job completion. So, we synchronously wait before | |
185 | * returning, and we make sure that L2 invalidates don't happen in the | |
186 | * meantime to confuse our are-we-done checks. | |
187 | */ | |
188 | void | |
189 | v3d_clean_caches(struct v3d_dev *v3d) | |
190 | { | |
191 | struct drm_device *dev = &v3d->drm; | |
192 | int core = 0; | |
193 | ||
194 | trace_v3d_cache_clean_begin(dev); | |
195 | ||
196 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); | |
197 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & | |
e4f86819 ITQ |
198 | V3D_L2TCACTL_TMUWCF), 100)) { |
199 | DRM_ERROR("Timeout waiting for TMU write combiner flush\n"); | |
d223f98f EA |
200 | } |
201 | ||
202 | mutex_lock(&v3d->cache_clean_lock); | |
203 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, | |
204 | V3D_L2TCACTL_L2TFLS | | |
205 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM)); | |
206 | ||
207 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & | |
208 | V3D_L2TCACTL_L2TFLS), 100)) { | |
209 | DRM_ERROR("Timeout waiting for L2T clean\n"); | |
210 | } | |
211 | ||
212 | mutex_unlock(&v3d->cache_clean_lock); | |
213 | ||
214 | trace_v3d_cache_clean_end(dev); | |
57692c94 EA |
215 | } |
216 | ||
217 | /* Invalidates the slice caches. These are read-only caches. */ | |
218 | static void | |
219 | v3d_invalidate_slices(struct v3d_dev *v3d, int core) | |
220 | { | |
221 | V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, | |
222 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | | |
223 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | | |
224 | V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | |
225 | V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | |
226 | } | |
227 | ||
57692c94 EA |
228 | void |
229 | v3d_invalidate_caches(struct v3d_dev *v3d) | |
230 | { | |
aa5beec3 EA |
231 | /* Invalidate the caches from the outside in. That way if |
232 | * another CL's concurrent use of nearby memory were to pull | |
233 | * an invalidated cacheline back in, we wouldn't leave stale | |
234 | * data in the inner cache. | |
235 | */ | |
57692c94 | 236 | v3d_flush_l3(v3d); |
7b9d2fe4 | 237 | v3d_invalidate_l2c(v3d, 0); |
57692c94 | 238 | v3d_flush_l2t(v3d, 0); |
aa5beec3 | 239 | v3d_invalidate_slices(v3d, 0); |
57692c94 EA |
240 | } |
241 | ||
57692c94 EA |
242 | int |
243 | v3d_gem_init(struct drm_device *dev) | |
244 | { | |
245 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
246 | u32 pt_size = 4096 * 1024; | |
247 | int ret, i; | |
248 | ||
509433d8 | 249 | for (i = 0; i < V3D_MAX_QUEUES; i++) { |
57692c94 | 250 | v3d->queue[i].fence_context = dma_fence_context_alloc(1); |
509433d8 MC |
251 | v3d->queue[i].start_ns = 0; |
252 | v3d->queue[i].enabled_ns = 0; | |
253 | v3d->queue[i].jobs_sent = 0; | |
254 | } | |
57692c94 EA |
255 | |
256 | spin_lock_init(&v3d->mm_lock); | |
257 | spin_lock_init(&v3d->job_lock); | |
91d502f6 MC |
258 | ret = drmm_mutex_init(dev, &v3d->bo_lock); |
259 | if (ret) | |
260 | return ret; | |
261 | ret = drmm_mutex_init(dev, &v3d->reset_lock); | |
262 | if (ret) | |
263 | return ret; | |
264 | ret = drmm_mutex_init(dev, &v3d->sched_lock); | |
265 | if (ret) | |
266 | return ret; | |
267 | ret = drmm_mutex_init(dev, &v3d->cache_clean_lock); | |
268 | if (ret) | |
269 | return ret; | |
57692c94 EA |
270 | |
271 | /* Note: We don't allocate address 0. Various bits of HW | |
272 | * treat 0 as special, such as the occlusion query counters | |
273 | * where 0 means "disabled". | |
274 | */ | |
275 | drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); | |
276 | ||
bc662528 | 277 | v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size, |
57692c94 EA |
278 | &v3d->pt_paddr, |
279 | GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); | |
280 | if (!v3d->pt) { | |
281 | drm_mm_takedown(&v3d->mm); | |
bc662528 | 282 | dev_err(v3d->drm.dev, |
4a83c26a | 283 | "Failed to allocate page tables. Please ensure you have DMA enabled.\n"); |
57692c94 EA |
284 | return -ENOMEM; |
285 | } | |
286 | ||
287 | v3d_init_hw_state(v3d); | |
288 | v3d_mmu_set_page_table(v3d); | |
289 | ||
290 | ret = v3d_sched_init(v3d); | |
291 | if (ret) { | |
292 | drm_mm_takedown(&v3d->mm); | |
bc662528 | 293 | dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, |
57692c94 EA |
294 | v3d->pt_paddr); |
295 | } | |
296 | ||
297 | return 0; | |
298 | } | |
299 | ||
300 | void | |
301 | v3d_gem_destroy(struct drm_device *dev) | |
302 | { | |
303 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
57692c94 EA |
304 | |
305 | v3d_sched_fini(v3d); | |
306 | ||
a783a09e | 307 | /* Waiting for jobs to finish would need to be done before |
57692c94 EA |
308 | * unregistering V3D. |
309 | */ | |
14d1d190 EA |
310 | WARN_ON(v3d->bin_job); |
311 | WARN_ON(v3d->render_job); | |
79d94360 MC |
312 | WARN_ON(v3d->tfu_job); |
313 | WARN_ON(v3d->csd_job); | |
57692c94 EA |
314 | |
315 | drm_mm_takedown(&v3d->mm); | |
316 | ||
bc662528 DV |
317 | dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, |
318 | v3d->pt_paddr); | |
57692c94 | 319 | } |