Commit | Line | Data |
---|---|---|
57692c94 EA |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* Copyright (C) 2015-2018 Broadcom */ | |
3 | ||
220989e7 SR |
4 | #include <linux/delay.h> |
5 | #include <linux/mutex.h> | |
6 | #include <linux/spinlock_types.h> | |
7 | #include <linux/workqueue.h> | |
8 | ||
57692c94 EA |
9 | #include <drm/drm_encoder.h> |
10 | #include <drm/drm_gem.h> | |
40609d48 | 11 | #include <drm/drm_gem_shmem_helper.h> |
57692c94 | 12 | #include <drm/gpu_scheduler.h> |
220989e7 | 13 | |
1584f16c | 14 | #include "uapi/drm/v3d_drm.h" |
57692c94 | 15 | |
220989e7 | 16 | struct clk; |
220989e7 SR |
17 | struct platform_device; |
18 | struct reset_control; | |
19 | ||
57692c94 EA |
20 | #define GMP_GRANULARITY (128 * 1024) |
21 | ||
d223f98f | 22 | #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) |
57692c94 EA |
23 | |
24 | struct v3d_queue_state { | |
25 | struct drm_gpu_scheduler sched; | |
26 | ||
27 | u64 fence_context; | |
28 | u64 emit_seqno; | |
57692c94 EA |
29 | }; |
30 | ||
26a4dc29 JSR |
31 | /* Performance monitor object. The perform lifetime is controlled by userspace |
32 | * using perfmon related ioctls. A perfmon can be attached to a submit_cl | |
33 | * request, and when this is the case, HW perf counters will be activated just | |
34 | * before the submit_cl is submitted to the GPU and disabled when the job is | |
35 | * done. This way, only events related to a specific job will be counted. | |
36 | */ | |
37 | struct v3d_perfmon { | |
38 | /* Tracks the number of users of the perfmon, when this counter reaches | |
39 | * zero the perfmon is destroyed. | |
40 | */ | |
41 | refcount_t refcnt; | |
42 | ||
43 | /* Protects perfmon stop, as it can be invoked from multiple places. */ | |
44 | struct mutex lock; | |
45 | ||
46 | /* Number of counters activated in this perfmon instance | |
47 | * (should be less than DRM_V3D_MAX_PERF_COUNTERS). | |
48 | */ | |
49 | u8 ncounters; | |
50 | ||
51 | /* Events counted by the HW perf counters. */ | |
52 | u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; | |
53 | ||
54 | /* Storage for counter values. Counters are incremented by the | |
55 | * HW perf counter values every time the perfmon is attached | |
56 | * to a GPU job. This way, perfmon users don't have to | |
57 | * retrieve the results after each job if they want to track | |
58 | * events covering several submissions. Note that counter | |
59 | * values can't be reset, but you can fake a reset by | |
60 | * destroying the perfmon and creating a new one. | |
61 | */ | |
62 | u64 values[]; | |
63 | }; | |
64 | ||
57692c94 EA |
65 | struct v3d_dev { |
66 | struct drm_device drm; | |
67 | ||
68 | /* Short representation (e.g. 33, 41) of the V3D tech version | |
69 | * and revision. | |
70 | */ | |
71 | int ver; | |
eea9b97b | 72 | bool single_irq_line; |
57692c94 | 73 | |
57692c94 EA |
74 | void __iomem *hub_regs; |
75 | void __iomem *core_regs[3]; | |
76 | void __iomem *bridge_regs; | |
77 | void __iomem *gca_regs; | |
78 | struct clk *clk; | |
eea9b97b | 79 | struct reset_control *reset; |
57692c94 EA |
80 | |
81 | /* Virtual and DMA addresses of the single shared page table. */ | |
82 | volatile u32 *pt; | |
83 | dma_addr_t pt_paddr; | |
84 | ||
85 | /* Virtual and DMA addresses of the MMU's scratch page. When | |
86 | * a read or write is invalid in the MMU, it will be | |
87 | * redirected here. | |
88 | */ | |
89 | void *mmu_scratch; | |
90 | dma_addr_t mmu_scratch_paddr; | |
38c2c791 EA |
91 | /* virtual address bits from V3D to the MMU. */ |
92 | int va_width; | |
57692c94 EA |
93 | |
94 | /* Number of V3D cores. */ | |
95 | u32 cores; | |
96 | ||
97 | /* Allocator managing the address space. All units are in | |
98 | * number of pages. | |
99 | */ | |
100 | struct drm_mm mm; | |
101 | spinlock_t mm_lock; | |
102 | ||
103 | struct work_struct overflow_mem_work; | |
104 | ||
a783a09e EA |
105 | struct v3d_bin_job *bin_job; |
106 | struct v3d_render_job *render_job; | |
1584f16c | 107 | struct v3d_tfu_job *tfu_job; |
d223f98f | 108 | struct v3d_csd_job *csd_job; |
57692c94 EA |
109 | |
110 | struct v3d_queue_state queue[V3D_MAX_QUEUES]; | |
111 | ||
112 | /* Spinlock used to synchronize the overflow memory | |
113 | * management against bin job submission. | |
114 | */ | |
115 | spinlock_t job_lock; | |
116 | ||
26a4dc29 JSR |
117 | /* Used to track the active perfmon if any. */ |
118 | struct v3d_perfmon *active_perfmon; | |
119 | ||
57692c94 EA |
120 | /* Protects bo_stats */ |
121 | struct mutex bo_lock; | |
122 | ||
123 | /* Lock taken when resetting the GPU, to keep multiple | |
124 | * processes from trying to park the scheduler threads and | |
125 | * reset at once. | |
126 | */ | |
127 | struct mutex reset_lock; | |
128 | ||
7122b68b EA |
129 | /* Lock taken when creating and pushing the GPU scheduler |
130 | * jobs, to keep the sched-fence seqnos in order. | |
131 | */ | |
132 | struct mutex sched_lock; | |
133 | ||
d223f98f EA |
134 | /* Lock taken during a cache clean and when initiating an L2 |
135 | * flush, to keep L2 flushes from interfering with the | |
136 | * synchronous L2 cleans. | |
137 | */ | |
138 | struct mutex cache_clean_lock; | |
139 | ||
57692c94 EA |
140 | struct { |
141 | u32 num_allocated; | |
142 | u32 pages_allocated; | |
143 | } bo_stats; | |
144 | }; | |
145 | ||
146 | static inline struct v3d_dev * | |
147 | to_v3d_dev(struct drm_device *dev) | |
148 | { | |
af25c16b | 149 | return container_of(dev, struct v3d_dev, drm); |
57692c94 EA |
150 | } |
151 | ||
d223f98f EA |
152 | static inline bool |
153 | v3d_has_csd(struct v3d_dev *v3d) | |
154 | { | |
155 | return v3d->ver >= 41; | |
156 | } | |
157 | ||
0df3ac76 DV |
158 | #define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev) |
159 | ||
57692c94 EA |
160 | /* The per-fd struct, which tracks the MMU mappings. */ |
161 | struct v3d_file_priv { | |
162 | struct v3d_dev *v3d; | |
163 | ||
26a4dc29 JSR |
164 | struct { |
165 | struct idr idr; | |
166 | struct mutex lock; | |
167 | } perfmon; | |
168 | ||
57692c94 EA |
169 | struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; |
170 | }; | |
171 | ||
57692c94 | 172 | struct v3d_bo { |
40609d48 | 173 | struct drm_gem_shmem_object base; |
57692c94 EA |
174 | |
175 | struct drm_mm_node node; | |
176 | ||
57692c94 | 177 | /* List entry for the BO's position in |
a783a09e | 178 | * v3d_render_job->unref_list |
57692c94 EA |
179 | */ |
180 | struct list_head unref_head; | |
57692c94 EA |
181 | }; |
182 | ||
183 | static inline struct v3d_bo * | |
184 | to_v3d_bo(struct drm_gem_object *bo) | |
185 | { | |
186 | return (struct v3d_bo *)bo; | |
187 | } | |
188 | ||
189 | struct v3d_fence { | |
190 | struct dma_fence base; | |
191 | struct drm_device *dev; | |
192 | /* v3d seqno for signaled() test */ | |
193 | u64 seqno; | |
194 | enum v3d_queue queue; | |
195 | }; | |
196 | ||
197 | static inline struct v3d_fence * | |
198 | to_v3d_fence(struct dma_fence *fence) | |
199 | { | |
200 | return (struct v3d_fence *)fence; | |
201 | } | |
202 | ||
203 | #define V3D_READ(offset) readl(v3d->hub_regs + offset) | |
204 | #define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) | |
205 | ||
206 | #define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) | |
207 | #define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) | |
208 | ||
209 | #define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) | |
210 | #define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) | |
211 | ||
212 | #define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) | |
213 | #define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) | |
214 | ||
215 | struct v3d_job { | |
216 | struct drm_sched_job base; | |
217 | ||
a783a09e EA |
218 | struct kref refcount; |
219 | ||
220 | struct v3d_dev *v3d; | |
221 | ||
222 | /* This is the array of BOs that were looked up at the start | |
223 | * of submission. | |
224 | */ | |
225 | struct drm_gem_object **bo; | |
226 | u32 bo_count; | |
57692c94 | 227 | |
57692c94 | 228 | /* v3d fence to be signaled by IRQ handler when the job is complete. */ |
3f0b646e | 229 | struct dma_fence *irq_fence; |
57692c94 | 230 | |
a783a09e EA |
231 | /* scheduler fence for when the job is considered complete and |
232 | * the BO reservations can be released. | |
233 | */ | |
234 | struct dma_fence *done_fence; | |
235 | ||
26a4dc29 JSR |
236 | /* Pointer to a performance monitor object if the user requested it, |
237 | * NULL otherwise. | |
238 | */ | |
239 | struct v3d_perfmon *perfmon; | |
240 | ||
a783a09e EA |
241 | /* Callback for the freeing of the job on refcount going to 0. */ |
242 | void (*free)(struct kref *ref); | |
243 | }; | |
244 | ||
245 | struct v3d_bin_job { | |
246 | struct v3d_job base; | |
247 | ||
57692c94 EA |
248 | /* GPU virtual addresses of the start/end of the CL job. */ |
249 | u32 start, end; | |
624bb0c0 EA |
250 | |
251 | u32 timedout_ctca, timedout_ctra; | |
57692c94 | 252 | |
a783a09e EA |
253 | /* Corresponding render job, for attaching our overflow memory. */ |
254 | struct v3d_render_job *render; | |
255 | ||
256 | /* Submitted tile memory allocation start/size, tile state. */ | |
257 | u32 qma, qms, qts; | |
258 | }; | |
57692c94 | 259 | |
a783a09e EA |
260 | struct v3d_render_job { |
261 | struct v3d_job base; | |
57692c94 | 262 | |
a783a09e EA |
263 | /* GPU virtual addresses of the start/end of the CL job. */ |
264 | u32 start, end; | |
57692c94 | 265 | |
a783a09e | 266 | u32 timedout_ctca, timedout_ctra; |
57692c94 EA |
267 | |
268 | /* List of overflow BOs used in the job that need to be | |
269 | * released once the job is complete. | |
270 | */ | |
271 | struct list_head unref_list; | |
57692c94 EA |
272 | }; |
273 | ||
1584f16c | 274 | struct v3d_tfu_job { |
a783a09e | 275 | struct v3d_job base; |
1584f16c EA |
276 | |
277 | struct drm_v3d_submit_tfu args; | |
1584f16c EA |
278 | }; |
279 | ||
d223f98f EA |
280 | struct v3d_csd_job { |
281 | struct v3d_job base; | |
282 | ||
283 | u32 timedout_batches; | |
284 | ||
285 | struct drm_v3d_submit_csd args; | |
286 | }; | |
287 | ||
e4165ae8 MW |
288 | struct v3d_submit_outsync { |
289 | struct drm_syncobj *syncobj; | |
290 | }; | |
291 | ||
292 | struct v3d_submit_ext { | |
293 | u32 flags; | |
294 | u32 wait_stage; | |
295 | ||
296 | u32 in_sync_count; | |
297 | u64 in_syncs; | |
298 | ||
299 | u32 out_sync_count; | |
300 | struct v3d_submit_outsync *out_syncs; | |
301 | }; | |
302 | ||
57692c94 | 303 | /** |
9daee614 | 304 | * __wait_for - magic wait macro |
57692c94 | 305 | * |
9daee614 JH |
306 | * Macro to help avoid open coding check/wait/timeout patterns. Note that it's |
307 | * important that we check the condition again after having timed out, since the | |
308 | * timeout could be due to preemption or similar and we've never had a chance to | |
309 | * check the condition before the timeout. | |
57692c94 | 310 | */ |
9daee614 JH |
311 | #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ |
312 | const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ | |
313 | long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ | |
314 | int ret__; \ | |
315 | might_sleep(); \ | |
316 | for (;;) { \ | |
317 | const bool expired__ = ktime_after(ktime_get_raw(), end__); \ | |
318 | OP; \ | |
319 | /* Guarantee COND check prior to timeout */ \ | |
320 | barrier(); \ | |
321 | if (COND) { \ | |
322 | ret__ = 0; \ | |
57692c94 EA |
323 | break; \ |
324 | } \ | |
9daee614 JH |
325 | if (expired__) { \ |
326 | ret__ = -ETIMEDOUT; \ | |
327 | break; \ | |
328 | } \ | |
329 | usleep_range(wait__, wait__ * 2); \ | |
330 | if (wait__ < (Wmax)) \ | |
331 | wait__ <<= 1; \ | |
57692c94 EA |
332 | } \ |
333 | ret__; \ | |
334 | }) | |
335 | ||
9daee614 JH |
336 | #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ |
337 | (Wmax)) | |
338 | #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) | |
339 | ||
57692c94 EA |
340 | static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) |
341 | { | |
342 | /* nsecs_to_jiffies64() does not guard against overflow */ | |
343 | if (NSEC_PER_SEC % HZ && | |
344 | div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) | |
345 | return MAX_JIFFY_OFFSET; | |
346 | ||
347 | return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); | |
348 | } | |
349 | ||
350 | /* v3d_bo.c */ | |
40609d48 | 351 | struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size); |
57692c94 EA |
352 | void v3d_free_object(struct drm_gem_object *gem_obj); |
353 | struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, | |
354 | size_t size); | |
355 | int v3d_create_bo_ioctl(struct drm_device *dev, void *data, | |
356 | struct drm_file *file_priv); | |
357 | int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, | |
358 | struct drm_file *file_priv); | |
359 | int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, | |
360 | struct drm_file *file_priv); | |
57692c94 EA |
361 | struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, |
362 | struct dma_buf_attachment *attach, | |
363 | struct sg_table *sgt); | |
364 | ||
365 | /* v3d_debugfs.c */ | |
7ce84471 | 366 | void v3d_debugfs_init(struct drm_minor *minor); |
57692c94 EA |
367 | |
368 | /* v3d_fence.c */ | |
369 | extern const struct dma_fence_ops v3d_fence_ops; | |
370 | struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); | |
371 | ||
372 | /* v3d_gem.c */ | |
373 | int v3d_gem_init(struct drm_device *dev); | |
374 | void v3d_gem_destroy(struct drm_device *dev); | |
375 | int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, | |
376 | struct drm_file *file_priv); | |
1584f16c EA |
377 | int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, |
378 | struct drm_file *file_priv); | |
d223f98f EA |
379 | int v3d_submit_csd_ioctl(struct drm_device *dev, void *data, |
380 | struct drm_file *file_priv); | |
57692c94 EA |
381 | int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, |
382 | struct drm_file *file_priv); | |
916044fa | 383 | void v3d_job_cleanup(struct v3d_job *job); |
a783a09e | 384 | void v3d_job_put(struct v3d_job *job); |
57692c94 EA |
385 | void v3d_reset(struct v3d_dev *v3d); |
386 | void v3d_invalidate_caches(struct v3d_dev *v3d); | |
d223f98f | 387 | void v3d_clean_caches(struct v3d_dev *v3d); |
57692c94 EA |
388 | |
389 | /* v3d_irq.c */ | |
fc227715 | 390 | int v3d_irq_init(struct v3d_dev *v3d); |
57692c94 EA |
391 | void v3d_irq_enable(struct v3d_dev *v3d); |
392 | void v3d_irq_disable(struct v3d_dev *v3d); | |
393 | void v3d_irq_reset(struct v3d_dev *v3d); | |
394 | ||
395 | /* v3d_mmu.c */ | |
396 | int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, | |
397 | u32 *offset); | |
398 | int v3d_mmu_set_page_table(struct v3d_dev *v3d); | |
399 | void v3d_mmu_insert_ptes(struct v3d_bo *bo); | |
400 | void v3d_mmu_remove_ptes(struct v3d_bo *bo); | |
401 | ||
402 | /* v3d_sched.c */ | |
403 | int v3d_sched_init(struct v3d_dev *v3d); | |
404 | void v3d_sched_fini(struct v3d_dev *v3d); | |
26a4dc29 JSR |
405 | |
406 | /* v3d_perfmon.c */ | |
407 | void v3d_perfmon_get(struct v3d_perfmon *perfmon); | |
408 | void v3d_perfmon_put(struct v3d_perfmon *perfmon); | |
409 | void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon); | |
410 | void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, | |
411 | bool capture); | |
412 | struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id); | |
413 | void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv); | |
414 | void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv); | |
415 | int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, | |
416 | struct drm_file *file_priv); | |
417 | int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, | |
418 | struct drm_file *file_priv); | |
419 | int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, | |
420 | struct drm_file *file_priv); |