Commit | Line | Data |
---|---|---|
57692c94 EA |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* Copyright (C) 2014-2018 Broadcom */ | |
3 | ||
220989e7 SR |
4 | #include <linux/device.h> |
5 | #include <linux/dma-mapping.h> | |
6 | #include <linux/io.h> | |
57692c94 EA |
7 | #include <linux/module.h> |
8 | #include <linux/platform_device.h> | |
9 | #include <linux/pm_runtime.h> | |
eea9b97b | 10 | #include <linux/reset.h> |
57692c94 | 11 | #include <linux/sched/signal.h> |
220989e7 SR |
12 | #include <linux/uaccess.h> |
13 | ||
14 | #include <drm/drm_syncobj.h> | |
15 | #include <uapi/drm/v3d_drm.h> | |
57692c94 | 16 | |
57692c94 EA |
17 | #include "v3d_drv.h" |
18 | #include "v3d_regs.h" | |
19 | #include "v3d_trace.h" | |
20 | ||
21 | static void | |
22 | v3d_init_core(struct v3d_dev *v3d, int core) | |
23 | { | |
24 | /* Set OVRTMUOUT, which means that the texture sampler uniform | |
25 | * configuration's tmu output type field is used, instead of | |
26 | * using the hardware default behavior based on the texture | |
27 | * type. If you want the default behavior, you can still put | |
28 | * "2" in the indirect texture state's output_type field. | |
29 | */ | |
a7dde1b7 EA |
30 | if (v3d->ver < 40) |
31 | V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); | |
57692c94 EA |
32 | |
33 | /* Whenever we flush the L2T cache, we always want to flush | |
34 | * the whole thing. | |
35 | */ | |
36 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); | |
37 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); | |
38 | } | |
39 | ||
40 | /* Sets invariant state for the HW. */ | |
41 | static void | |
42 | v3d_init_hw_state(struct v3d_dev *v3d) | |
43 | { | |
44 | v3d_init_core(v3d, 0); | |
45 | } | |
46 | ||
47 | static void | |
48 | v3d_idle_axi(struct v3d_dev *v3d, int core) | |
49 | { | |
50 | V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); | |
51 | ||
52 | if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & | |
53 | (V3D_GMP_STATUS_RD_COUNT_MASK | | |
54 | V3D_GMP_STATUS_WR_COUNT_MASK | | |
55 | V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { | |
56 | DRM_ERROR("Failed to wait for safe GMP shutdown\n"); | |
57 | } | |
58 | } | |
59 | ||
60 | static void | |
61 | v3d_idle_gca(struct v3d_dev *v3d) | |
62 | { | |
63 | if (v3d->ver >= 41) | |
64 | return; | |
65 | ||
66 | V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); | |
67 | ||
68 | if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & | |
69 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == | |
70 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { | |
71 | DRM_ERROR("Failed to wait for safe GCA shutdown\n"); | |
72 | } | |
73 | } | |
74 | ||
75 | static void | |
eea9b97b | 76 | v3d_reset_by_bridge(struct v3d_dev *v3d) |
57692c94 EA |
77 | { |
78 | int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); | |
79 | ||
80 | if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { | |
81 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, | |
82 | V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); | |
83 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); | |
84 | ||
85 | /* GFXH-1383: The SW_INIT may cause a stray write to address 0 | |
86 | * of the unit, so reset it to its power-on value here. | |
87 | */ | |
88 | V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); | |
89 | } else { | |
90 | WARN_ON_ONCE(V3D_GET_FIELD(version, | |
91 | V3D_TOP_GR_BRIDGE_MAJOR) != 7); | |
92 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, | |
93 | V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); | |
94 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); | |
95 | } | |
eea9b97b EA |
96 | } |
97 | ||
98 | static void | |
99 | v3d_reset_v3d(struct v3d_dev *v3d) | |
100 | { | |
101 | if (v3d->reset) | |
102 | reset_control_reset(v3d->reset); | |
103 | else | |
104 | v3d_reset_by_bridge(v3d); | |
57692c94 EA |
105 | |
106 | v3d_init_hw_state(v3d); | |
107 | } | |
108 | ||
109 | void | |
110 | v3d_reset(struct v3d_dev *v3d) | |
111 | { | |
112 | struct drm_device *dev = &v3d->drm; | |
113 | ||
1ba9d7cb EA |
114 | DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n"); |
115 | DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n", | |
116 | V3D_CORE_READ(0, V3D_ERR_STAT)); | |
57692c94 EA |
117 | trace_v3d_reset_begin(dev); |
118 | ||
119 | /* XXX: only needed for safe powerdown, not reset. */ | |
120 | if (false) | |
121 | v3d_idle_axi(v3d, 0); | |
122 | ||
123 | v3d_idle_gca(v3d); | |
124 | v3d_reset_v3d(v3d); | |
125 | ||
126 | v3d_mmu_set_page_table(v3d); | |
127 | v3d_irq_reset(v3d); | |
128 | ||
26a4dc29 JSR |
129 | v3d_perfmon_stop(v3d, v3d->active_perfmon, false); |
130 | ||
57692c94 EA |
131 | trace_v3d_reset_end(dev); |
132 | } | |
133 | ||
134 | static void | |
135 | v3d_flush_l3(struct v3d_dev *v3d) | |
136 | { | |
137 | if (v3d->ver < 41) { | |
138 | u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); | |
139 | ||
140 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
141 | gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); | |
142 | ||
143 | if (v3d->ver < 33) { | |
144 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
145 | gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); | |
146 | } | |
147 | } | |
148 | } | |
149 | ||
7b9d2fe4 EA |
150 | /* Invalidates the (read-only) L2C cache. This was the L2 cache for |
151 | * uniforms and instructions on V3D 3.2. | |
152 | */ | |
57692c94 | 153 | static void |
7b9d2fe4 | 154 | v3d_invalidate_l2c(struct v3d_dev *v3d, int core) |
57692c94 | 155 | { |
7b9d2fe4 EA |
156 | if (v3d->ver > 32) |
157 | return; | |
158 | ||
57692c94 EA |
159 | V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, |
160 | V3D_L2CACTL_L2CCLR | | |
161 | V3D_L2CACTL_L2CENA); | |
162 | } | |
163 | ||
57692c94 EA |
164 | /* Invalidates texture L2 cachelines */ |
165 | static void | |
166 | v3d_flush_l2t(struct v3d_dev *v3d, int core) | |
167 | { | |
51c1b6f9 EA |
168 | /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't |
169 | * need to wait for completion before dispatching the job -- | |
170 | * L2T accesses will be stalled until the flush has completed. | |
d223f98f EA |
171 | * However, we do need to make sure we don't try to trigger a |
172 | * new flush while the L2_CLEAN queue is trying to | |
173 | * synchronously clean after a job. | |
51c1b6f9 | 174 | */ |
d223f98f | 175 | mutex_lock(&v3d->cache_clean_lock); |
57692c94 EA |
176 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, |
177 | V3D_L2TCACTL_L2TFLS | | |
178 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); | |
d223f98f EA |
179 | mutex_unlock(&v3d->cache_clean_lock); |
180 | } | |
181 | ||
182 | /* Cleans texture L1 and L2 cachelines (writing back dirty data). | |
183 | * | |
184 | * For cleaning, which happens from the CACHE_CLEAN queue after CSD has | |
185 | * executed, we need to make sure that the clean is done before | |
186 | * signaling job completion. So, we synchronously wait before | |
187 | * returning, and we make sure that L2 invalidates don't happen in the | |
188 | * meantime to confuse our are-we-done checks. | |
189 | */ | |
190 | void | |
191 | v3d_clean_caches(struct v3d_dev *v3d) | |
192 | { | |
193 | struct drm_device *dev = &v3d->drm; | |
194 | int core = 0; | |
195 | ||
196 | trace_v3d_cache_clean_begin(dev); | |
197 | ||
198 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); | |
199 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & | |
e4f86819 ITQ |
200 | V3D_L2TCACTL_TMUWCF), 100)) { |
201 | DRM_ERROR("Timeout waiting for TMU write combiner flush\n"); | |
d223f98f EA |
202 | } |
203 | ||
204 | mutex_lock(&v3d->cache_clean_lock); | |
205 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, | |
206 | V3D_L2TCACTL_L2TFLS | | |
207 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM)); | |
208 | ||
209 | if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & | |
210 | V3D_L2TCACTL_L2TFLS), 100)) { | |
211 | DRM_ERROR("Timeout waiting for L2T clean\n"); | |
212 | } | |
213 | ||
214 | mutex_unlock(&v3d->cache_clean_lock); | |
215 | ||
216 | trace_v3d_cache_clean_end(dev); | |
57692c94 EA |
217 | } |
218 | ||
219 | /* Invalidates the slice caches. These are read-only caches. */ | |
220 | static void | |
221 | v3d_invalidate_slices(struct v3d_dev *v3d, int core) | |
222 | { | |
223 | V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, | |
224 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | | |
225 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | | |
226 | V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | |
227 | V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | |
228 | } | |
229 | ||
57692c94 EA |
230 | void |
231 | v3d_invalidate_caches(struct v3d_dev *v3d) | |
232 | { | |
aa5beec3 EA |
233 | /* Invalidate the caches from the outside in. That way if |
234 | * another CL's concurrent use of nearby memory were to pull | |
235 | * an invalidated cacheline back in, we wouldn't leave stale | |
236 | * data in the inner cache. | |
237 | */ | |
57692c94 | 238 | v3d_flush_l3(v3d); |
7b9d2fe4 | 239 | v3d_invalidate_l2c(v3d, 0); |
57692c94 | 240 | v3d_flush_l2t(v3d, 0); |
aa5beec3 | 241 | v3d_invalidate_slices(v3d, 0); |
57692c94 EA |
242 | } |
243 | ||
57692c94 EA |
244 | /* Takes the reservation lock on all the BOs being referenced, so that |
245 | * at queue submit time we can update the reservations. | |
246 | * | |
247 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory | |
248 | * (all of which are on exec->unref_list). They're entirely private | |
249 | * to v3d, so we don't attach dma-buf fences to them. | |
250 | */ | |
251 | static int | |
dffa9b7a | 252 | v3d_lock_bo_reservations(struct v3d_job *job, |
57692c94 EA |
253 | struct ww_acquire_ctx *acquire_ctx) |
254 | { | |
57692c94 | 255 | int i, ret; |
57692c94 | 256 | |
dffa9b7a | 257 | ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); |
c2b3e61a EA |
258 | if (ret) |
259 | return ret; | |
57692c94 | 260 | |
dffa9b7a | 261 | for (i = 0; i < job->bo_count; i++) { |
da3208e8 DV |
262 | ret = drm_sched_job_add_implicit_dependencies(&job->base, |
263 | job->bo[i], true); | |
dffa9b7a EA |
264 | if (ret) { |
265 | drm_gem_unlock_reservations(job->bo, job->bo_count, | |
266 | acquire_ctx); | |
267 | return ret; | |
268 | } | |
269 | } | |
270 | ||
57692c94 EA |
271 | return 0; |
272 | } | |
273 | ||
274 | /** | |
a783a09e | 275 | * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects |
57692c94 EA |
276 | * referenced by the job. |
277 | * @dev: DRM device | |
278 | * @file_priv: DRM file for this fd | |
a783a09e | 279 | * @job: V3D job being set up |
e226878e LJ |
280 | * @bo_handles: GEM handles |
281 | * @bo_count: Number of GEM handles passed in | |
57692c94 EA |
282 | * |
283 | * The command validator needs to reference BOs by their index within | |
284 | * the submitted job's BO list. This does the validation of the job's | |
285 | * BO list and reference counting for the lifetime of the job. | |
286 | * | |
287 | * Note that this function doesn't need to unreference the BOs on | |
288 | * failure, because that will happen at v3d_exec_cleanup() time. | |
289 | */ | |
290 | static int | |
a783a09e EA |
291 | v3d_lookup_bos(struct drm_device *dev, |
292 | struct drm_file *file_priv, | |
293 | struct v3d_job *job, | |
294 | u64 bo_handles, | |
295 | u32 bo_count) | |
57692c94 EA |
296 | { |
297 | u32 *handles; | |
298 | int ret = 0; | |
299 | int i; | |
300 | ||
a783a09e | 301 | job->bo_count = bo_count; |
57692c94 | 302 | |
a783a09e | 303 | if (!job->bo_count) { |
57692c94 EA |
304 | /* See comment on bo_index for why we have to check |
305 | * this. | |
306 | */ | |
307 | DRM_DEBUG("Rendering requires BOs\n"); | |
308 | return -EINVAL; | |
309 | } | |
310 | ||
a783a09e EA |
311 | job->bo = kvmalloc_array(job->bo_count, |
312 | sizeof(struct drm_gem_cma_object *), | |
313 | GFP_KERNEL | __GFP_ZERO); | |
314 | if (!job->bo) { | |
57692c94 EA |
315 | DRM_DEBUG("Failed to allocate validated BO pointers\n"); |
316 | return -ENOMEM; | |
317 | } | |
318 | ||
a783a09e | 319 | handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL); |
57692c94 EA |
320 | if (!handles) { |
321 | ret = -ENOMEM; | |
322 | DRM_DEBUG("Failed to allocate incoming GEM handles\n"); | |
323 | goto fail; | |
324 | } | |
325 | ||
326 | if (copy_from_user(handles, | |
a783a09e EA |
327 | (void __user *)(uintptr_t)bo_handles, |
328 | job->bo_count * sizeof(u32))) { | |
57692c94 EA |
329 | ret = -EFAULT; |
330 | DRM_DEBUG("Failed to copy in GEM handles\n"); | |
331 | goto fail; | |
332 | } | |
333 | ||
334 | spin_lock(&file_priv->table_lock); | |
a783a09e | 335 | for (i = 0; i < job->bo_count; i++) { |
57692c94 EA |
336 | struct drm_gem_object *bo = idr_find(&file_priv->object_idr, |
337 | handles[i]); | |
338 | if (!bo) { | |
339 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n", | |
340 | i, handles[i]); | |
341 | ret = -ENOENT; | |
342 | spin_unlock(&file_priv->table_lock); | |
343 | goto fail; | |
344 | } | |
345 | drm_gem_object_get(bo); | |
a783a09e | 346 | job->bo[i] = bo; |
57692c94 EA |
347 | } |
348 | spin_unlock(&file_priv->table_lock); | |
349 | ||
350 | fail: | |
351 | kvfree(handles); | |
352 | return ret; | |
353 | } | |
354 | ||
355 | static void | |
a783a09e | 356 | v3d_job_free(struct kref *ref) |
57692c94 | 357 | { |
a783a09e EA |
358 | struct v3d_job *job = container_of(ref, struct v3d_job, refcount); |
359 | int i; | |
57692c94 | 360 | |
a783a09e EA |
361 | for (i = 0; i < job->bo_count; i++) { |
362 | if (job->bo[i]) | |
2b86189e | 363 | drm_gem_object_put(job->bo[i]); |
57692c94 | 364 | } |
a783a09e | 365 | kvfree(job->bo); |
57692c94 | 366 | |
a783a09e EA |
367 | dma_fence_put(job->irq_fence); |
368 | dma_fence_put(job->done_fence); | |
57692c94 | 369 | |
bc662528 DV |
370 | pm_runtime_mark_last_busy(job->v3d->drm.dev); |
371 | pm_runtime_put_autosuspend(job->v3d->drm.dev); | |
57692c94 | 372 | |
26a4dc29 JSR |
373 | if (job->perfmon) |
374 | v3d_perfmon_put(job->perfmon); | |
375 | ||
a783a09e | 376 | kfree(job); |
57692c94 EA |
377 | } |
378 | ||
1584f16c | 379 | static void |
a783a09e | 380 | v3d_render_job_free(struct kref *ref) |
1584f16c | 381 | { |
a783a09e EA |
382 | struct v3d_render_job *job = container_of(ref, struct v3d_render_job, |
383 | base.refcount); | |
384 | struct v3d_bo *bo, *save; | |
1584f16c | 385 | |
a783a09e | 386 | list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { |
2b86189e | 387 | drm_gem_object_put(&bo->base.base); |
1584f16c EA |
388 | } |
389 | ||
a783a09e | 390 | v3d_job_free(ref); |
1584f16c EA |
391 | } |
392 | ||
916044fa DV |
393 | void v3d_job_cleanup(struct v3d_job *job) |
394 | { | |
07c2a416 MW |
395 | if (!job) |
396 | return; | |
397 | ||
916044fa DV |
398 | drm_sched_job_cleanup(&job->base); |
399 | v3d_job_put(job); | |
400 | } | |
401 | ||
a783a09e | 402 | void v3d_job_put(struct v3d_job *job) |
1584f16c | 403 | { |
a783a09e | 404 | kref_put(&job->refcount, job->free); |
1584f16c EA |
405 | } |
406 | ||
57692c94 EA |
407 | int |
408 | v3d_wait_bo_ioctl(struct drm_device *dev, void *data, | |
409 | struct drm_file *file_priv) | |
410 | { | |
411 | int ret; | |
412 | struct drm_v3d_wait_bo *args = data; | |
57692c94 EA |
413 | ktime_t start = ktime_get(); |
414 | u64 delta_ns; | |
415 | unsigned long timeout_jiffies = | |
416 | nsecs_to_jiffies_timeout(args->timeout_ns); | |
417 | ||
418 | if (args->pad != 0) | |
419 | return -EINVAL; | |
420 | ||
52791eee | 421 | ret = drm_gem_dma_resv_wait(file_priv, args->handle, |
223583dd | 422 | true, timeout_jiffies); |
57692c94 EA |
423 | |
424 | /* Decrement the user's timeout, in case we got interrupted | |
425 | * such that the ioctl will be restarted. | |
426 | */ | |
427 | delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); | |
428 | if (delta_ns < args->timeout_ns) | |
429 | args->timeout_ns -= delta_ns; | |
430 | else | |
431 | args->timeout_ns = 0; | |
432 | ||
433 | /* Asked to wait beyond the jiffie/scheduler precision? */ | |
434 | if (ret == -ETIME && args->timeout_ns) | |
435 | ret = -EAGAIN; | |
436 | ||
57692c94 EA |
437 | return ret; |
438 | } | |
439 | ||
223583dd MW |
440 | static int |
441 | v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job, | |
442 | u32 in_sync, u32 point) | |
443 | { | |
444 | struct dma_fence *in_fence = NULL; | |
445 | int ret; | |
446 | ||
447 | ret = drm_syncobj_find_fence(file_priv, in_sync, point, 0, &in_fence); | |
448 | if (ret == -EINVAL) | |
449 | return ret; | |
450 | ||
451 | return drm_sched_job_add_dependency(&job->base, in_fence); | |
452 | } | |
453 | ||
a783a09e EA |
454 | static int |
455 | v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, | |
07c2a416 | 456 | void **container, size_t size, void (*free)(struct kref *ref), |
e4165ae8 | 457 | u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) |
a783a09e | 458 | { |
916044fa | 459 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; |
07c2a416 | 460 | struct v3d_job *job; |
e4165ae8 MW |
461 | bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); |
462 | int ret, i; | |
a783a09e | 463 | |
07c2a416 MW |
464 | *container = kcalloc(1, size, GFP_KERNEL); |
465 | if (!*container) { | |
466 | DRM_ERROR("Cannot allocate memory for v3d job."); | |
467 | return -ENOMEM; | |
468 | } | |
469 | ||
470 | job = *container; | |
a783a09e EA |
471 | job->v3d = v3d; |
472 | job->free = free; | |
473 | ||
bc662528 | 474 | ret = pm_runtime_get_sync(v3d->drm.dev); |
a783a09e | 475 | if (ret < 0) |
07c2a416 | 476 | goto fail; |
a783a09e | 477 | |
916044fa DV |
478 | ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], |
479 | v3d_priv); | |
480 | if (ret) | |
07c2a416 | 481 | goto fail_job; |
dffa9b7a | 482 | |
e4165ae8 MW |
483 | if (has_multisync) { |
484 | if (se->in_sync_count && se->wait_stage == queue) { | |
485 | struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); | |
486 | ||
487 | for (i = 0; i < se->in_sync_count; i++) { | |
488 | struct drm_v3d_sem in; | |
489 | ||
ee30840b DC |
490 | if (copy_from_user(&in, handle++, sizeof(in))) { |
491 | ret = -EFAULT; | |
e4165ae8 MW |
492 | DRM_DEBUG("Failed to copy wait dep handle.\n"); |
493 | goto fail_deps; | |
494 | } | |
495 | ret = v3d_job_add_deps(file_priv, job, in.handle, 0); | |
496 | if (ret) | |
497 | goto fail_deps; | |
498 | } | |
499 | } | |
500 | } else { | |
501 | ret = v3d_job_add_deps(file_priv, job, in_sync, 0); | |
502 | if (ret) | |
503 | goto fail_deps; | |
504 | } | |
a783a09e EA |
505 | |
506 | kref_init(&job->refcount); | |
507 | ||
508 | return 0; | |
07c2a416 MW |
509 | |
510 | fail_deps: | |
916044fa | 511 | drm_sched_job_cleanup(&job->base); |
07c2a416 | 512 | fail_job: |
bc662528 | 513 | pm_runtime_put_autosuspend(v3d->drm.dev); |
07c2a416 MW |
514 | fail: |
515 | kfree(*container); | |
516 | *container = NULL; | |
517 | ||
dffa9b7a | 518 | return ret; |
a783a09e EA |
519 | } |
520 | ||
916044fa DV |
521 | static void |
522 | v3d_push_job(struct v3d_job *job) | |
a783a09e | 523 | { |
dbe48d03 DV |
524 | drm_sched_job_arm(&job->base); |
525 | ||
a783a09e EA |
526 | job->done_fence = dma_fence_get(&job->base.s_fence->finished); |
527 | ||
528 | /* put by scheduler job completion */ | |
529 | kref_get(&job->refcount); | |
530 | ||
0e10e9a1 | 531 | drm_sched_entity_push_job(&job->base); |
a783a09e EA |
532 | } |
533 | ||
534 | static void | |
535 | v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, | |
536 | struct v3d_job *job, | |
537 | struct ww_acquire_ctx *acquire_ctx, | |
d223f98f | 538 | u32 out_sync, |
e4165ae8 | 539 | struct v3d_submit_ext *se, |
d223f98f | 540 | struct dma_fence *done_fence) |
a783a09e EA |
541 | { |
542 | struct drm_syncobj *sync_out; | |
e4165ae8 | 543 | bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); |
a783a09e EA |
544 | int i; |
545 | ||
546 | for (i = 0; i < job->bo_count; i++) { | |
547 | /* XXX: Use shared fences for read-only objects. */ | |
52791eee | 548 | dma_resv_add_excl_fence(job->bo[i]->resv, |
223583dd | 549 | job->done_fence); |
a783a09e EA |
550 | } |
551 | ||
552 | drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); | |
553 | ||
554 | /* Update the return sync object for the job */ | |
e4165ae8 MW |
555 | /* If it only supports a single signal semaphore*/ |
556 | if (!has_multisync) { | |
557 | sync_out = drm_syncobj_find(file_priv, out_sync); | |
558 | if (sync_out) { | |
559 | drm_syncobj_replace_fence(sync_out, done_fence); | |
560 | drm_syncobj_put(sync_out); | |
561 | } | |
562 | return; | |
563 | } | |
564 | ||
565 | /* If multiple semaphores extension is supported */ | |
566 | if (se->out_sync_count) { | |
567 | for (i = 0; i < se->out_sync_count; i++) { | |
568 | drm_syncobj_replace_fence(se->out_syncs[i].syncobj, | |
569 | done_fence); | |
570 | drm_syncobj_put(se->out_syncs[i].syncobj); | |
571 | } | |
572 | kvfree(se->out_syncs); | |
573 | } | |
574 | } | |
575 | ||
576 | static void | |
577 | v3d_put_multisync_post_deps(struct v3d_submit_ext *se) | |
578 | { | |
579 | unsigned int i; | |
580 | ||
581 | if (!(se && se->out_sync_count)) | |
582 | return; | |
583 | ||
584 | for (i = 0; i < se->out_sync_count; i++) | |
585 | drm_syncobj_put(se->out_syncs[i].syncobj); | |
586 | kvfree(se->out_syncs); | |
587 | } | |
588 | ||
589 | static int | |
590 | v3d_get_multisync_post_deps(struct drm_file *file_priv, | |
591 | struct v3d_submit_ext *se, | |
592 | u32 count, u64 handles) | |
593 | { | |
594 | struct drm_v3d_sem __user *post_deps; | |
595 | int i, ret; | |
596 | ||
597 | if (!count) | |
598 | return 0; | |
599 | ||
600 | se->out_syncs = (struct v3d_submit_outsync *) | |
601 | kvmalloc_array(count, | |
602 | sizeof(struct v3d_submit_outsync), | |
603 | GFP_KERNEL); | |
604 | if (!se->out_syncs) | |
605 | return -ENOMEM; | |
606 | ||
607 | post_deps = u64_to_user_ptr(handles); | |
608 | ||
609 | for (i = 0; i < count; i++) { | |
610 | struct drm_v3d_sem out; | |
611 | ||
ee30840b DC |
612 | if (copy_from_user(&out, post_deps++, sizeof(out))) { |
613 | ret = -EFAULT; | |
e4165ae8 MW |
614 | DRM_DEBUG("Failed to copy post dep handles\n"); |
615 | goto fail; | |
616 | } | |
617 | ||
618 | se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, | |
619 | out.handle); | |
620 | if (!se->out_syncs[i].syncobj) { | |
621 | ret = -EINVAL; | |
622 | goto fail; | |
623 | } | |
a783a09e | 624 | } |
e4165ae8 MW |
625 | se->out_sync_count = count; |
626 | ||
627 | return 0; | |
628 | ||
629 | fail: | |
630 | for (i--; i >= 0; i--) | |
631 | drm_syncobj_put(se->out_syncs[i].syncobj); | |
632 | kvfree(se->out_syncs); | |
633 | ||
634 | return ret; | |
635 | } | |
636 | ||
637 | /* Get data for multiple binary semaphores synchronization. Parse syncobj | |
638 | * to be signaled when job completes (out_sync). | |
639 | */ | |
640 | static int | |
641 | v3d_get_multisync_submit_deps(struct drm_file *file_priv, | |
642 | struct drm_v3d_extension __user *ext, | |
643 | void *data) | |
644 | { | |
645 | struct drm_v3d_multi_sync multisync; | |
646 | struct v3d_submit_ext *se = data; | |
647 | int ret; | |
648 | ||
ee30840b DC |
649 | if (copy_from_user(&multisync, ext, sizeof(multisync))) |
650 | return -EFAULT; | |
e4165ae8 MW |
651 | |
652 | if (multisync.pad) | |
653 | return -EINVAL; | |
654 | ||
655 | ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count, | |
656 | multisync.out_syncs); | |
657 | if (ret) | |
658 | return ret; | |
659 | ||
660 | se->in_sync_count = multisync.in_sync_count; | |
661 | se->in_syncs = multisync.in_syncs; | |
662 | se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; | |
663 | se->wait_stage = multisync.wait_stage; | |
664 | ||
665 | return 0; | |
a783a09e EA |
666 | } |
667 | ||
bb3425ef MW |
668 | /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data |
669 | * according to the extension id (name). | |
670 | */ | |
671 | static int | |
e4165ae8 MW |
672 | v3d_get_extensions(struct drm_file *file_priv, |
673 | u64 ext_handles, | |
674 | void *data) | |
bb3425ef MW |
675 | { |
676 | struct drm_v3d_extension __user *user_ext; | |
e4165ae8 | 677 | int ret; |
bb3425ef MW |
678 | |
679 | user_ext = u64_to_user_ptr(ext_handles); | |
680 | while (user_ext) { | |
681 | struct drm_v3d_extension ext; | |
682 | ||
683 | if (copy_from_user(&ext, user_ext, sizeof(ext))) { | |
684 | DRM_DEBUG("Failed to copy submit extension\n"); | |
685 | return -EFAULT; | |
686 | } | |
687 | ||
688 | switch (ext.id) { | |
e4165ae8 MW |
689 | case DRM_V3D_EXT_ID_MULTI_SYNC: |
690 | ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data); | |
691 | if (ret) | |
692 | return ret; | |
693 | break; | |
bb3425ef MW |
694 | default: |
695 | DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); | |
696 | return -EINVAL; | |
697 | } | |
698 | ||
699 | user_ext = u64_to_user_ptr(ext.next); | |
700 | } | |
701 | ||
702 | return 0; | |
703 | } | |
704 | ||
57692c94 EA |
705 | /** |
706 | * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. | |
707 | * @dev: DRM device | |
708 | * @data: ioctl argument | |
709 | * @file_priv: DRM file for this fd | |
710 | * | |
711 | * This is the main entrypoint for userspace to submit a 3D frame to | |
712 | * the GPU. Userspace provides the binner command list (if | |
713 | * applicable), and the kernel sets up the render command list to draw | |
714 | * to the framebuffer described in the ioctl, using the command lists | |
715 | * that the 3D engine's binner will produce. | |
716 | */ | |
717 | int | |
718 | v3d_submit_cl_ioctl(struct drm_device *dev, void *data, | |
719 | struct drm_file *file_priv) | |
720 | { | |
721 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
722 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; | |
723 | struct drm_v3d_submit_cl *args = data; | |
e4165ae8 | 724 | struct v3d_submit_ext se = {0}; |
a783a09e | 725 | struct v3d_bin_job *bin = NULL; |
07c2a416 | 726 | struct v3d_render_job *render = NULL; |
455d56ce ITQ |
727 | struct v3d_job *clean_job = NULL; |
728 | struct v3d_job *last_job; | |
57692c94 | 729 | struct ww_acquire_ctx acquire_ctx; |
57692c94 EA |
730 | int ret = 0; |
731 | ||
55a9b748 EA |
732 | trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); |
733 | ||
bb3425ef | 734 | if (args->pad) |
26a4dc29 JSR |
735 | return -EINVAL; |
736 | ||
bb3425ef MW |
737 | if (args->flags && |
738 | args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | | |
739 | DRM_V3D_SUBMIT_EXTENSION)) { | |
455d56ce | 740 | DRM_INFO("invalid flags: %d\n", args->flags); |
57692c94 EA |
741 | return -EINVAL; |
742 | } | |
743 | ||
bb3425ef | 744 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { |
e4165ae8 | 745 | ret = v3d_get_extensions(file_priv, args->extensions, &se); |
bb3425ef MW |
746 | if (ret) { |
747 | DRM_DEBUG("Failed to get extensions.\n"); | |
748 | return ret; | |
749 | } | |
750 | } | |
751 | ||
07c2a416 | 752 | ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render), |
e4165ae8 | 753 | v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); |
07c2a416 MW |
754 | if (ret) |
755 | goto fail; | |
57692c94 | 756 | |
a783a09e EA |
757 | render->start = args->rcl_start; |
758 | render->end = args->rcl_end; | |
759 | INIT_LIST_HEAD(&render->unref_list); | |
760 | ||
a783a09e | 761 | if (args->bcl_start != args->bcl_end) { |
07c2a416 | 762 | ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin), |
e4165ae8 | 763 | v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); |
07c2a416 MW |
764 | if (ret) |
765 | goto fail; | |
57692c94 | 766 | |
a783a09e EA |
767 | bin->start = args->bcl_start; |
768 | bin->end = args->bcl_end; | |
769 | bin->qma = args->qma; | |
770 | bin->qms = args->qms; | |
771 | bin->qts = args->qts; | |
772 | bin->render = render; | |
773 | } | |
57692c94 | 774 | |
455d56ce | 775 | if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { |
07c2a416 | 776 | ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), |
75ad021f | 777 | v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); |
07c2a416 | 778 | if (ret) |
455d56ce | 779 | goto fail; |
455d56ce ITQ |
780 | |
781 | last_job = clean_job; | |
782 | } else { | |
783 | last_job = &render->base; | |
784 | } | |
785 | ||
786 | ret = v3d_lookup_bos(dev, file_priv, last_job, | |
a783a09e | 787 | args->bo_handles, args->bo_handle_count); |
57692c94 EA |
788 | if (ret) |
789 | goto fail; | |
790 | ||
455d56ce | 791 | ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); |
57692c94 EA |
792 | if (ret) |
793 | goto fail; | |
794 | ||
26a4dc29 JSR |
795 | if (args->perfmon_id) { |
796 | render->base.perfmon = v3d_perfmon_find(v3d_priv, | |
797 | args->perfmon_id); | |
798 | ||
799 | if (!render->base.perfmon) { | |
800 | ret = -ENOENT; | |
e57c1a3b | 801 | goto fail_perfmon; |
26a4dc29 JSR |
802 | } |
803 | } | |
804 | ||
7122b68b | 805 | mutex_lock(&v3d->sched_lock); |
a783a09e | 806 | if (bin) { |
26a4dc29 JSR |
807 | bin->base.perfmon = render->base.perfmon; |
808 | v3d_perfmon_get(bin->base.perfmon); | |
916044fa | 809 | v3d_push_job(&bin->base); |
57692c94 | 810 | |
da3208e8 DV |
811 | ret = drm_sched_job_add_dependency(&render->base.base, |
812 | dma_fence_get(bin->base.done_fence)); | |
dffa9b7a EA |
813 | if (ret) |
814 | goto fail_unreserve; | |
57692c94 EA |
815 | } |
816 | ||
916044fa | 817 | v3d_push_job(&render->base); |
455d56ce ITQ |
818 | |
819 | if (clean_job) { | |
820 | struct dma_fence *render_fence = | |
821 | dma_fence_get(render->base.done_fence); | |
da3208e8 DV |
822 | ret = drm_sched_job_add_dependency(&clean_job->base, |
823 | render_fence); | |
455d56ce ITQ |
824 | if (ret) |
825 | goto fail_unreserve; | |
26a4dc29 JSR |
826 | clean_job->perfmon = render->base.perfmon; |
827 | v3d_perfmon_get(clean_job->perfmon); | |
916044fa | 828 | v3d_push_job(clean_job); |
455d56ce ITQ |
829 | } |
830 | ||
7122b68b | 831 | mutex_unlock(&v3d->sched_lock); |
57692c94 | 832 | |
a783a09e | 833 | v3d_attach_fences_and_unlock_reservation(file_priv, |
455d56ce | 834 | last_job, |
d223f98f EA |
835 | &acquire_ctx, |
836 | args->out_sync, | |
e4165ae8 | 837 | &se, |
455d56ce | 838 | last_job->done_fence); |
57692c94 | 839 | |
a783a09e EA |
840 | if (bin) |
841 | v3d_job_put(&bin->base); | |
842 | v3d_job_put(&render->base); | |
455d56ce ITQ |
843 | if (clean_job) |
844 | v3d_job_put(clean_job); | |
57692c94 EA |
845 | |
846 | return 0; | |
847 | ||
848 | fail_unreserve: | |
7122b68b | 849 | mutex_unlock(&v3d->sched_lock); |
e57c1a3b | 850 | fail_perfmon: |
455d56ce ITQ |
851 | drm_gem_unlock_reservations(last_job->bo, |
852 | last_job->bo_count, &acquire_ctx); | |
57692c94 | 853 | fail: |
07c2a416 MW |
854 | v3d_job_cleanup((void *)bin); |
855 | v3d_job_cleanup((void *)render); | |
856 | v3d_job_cleanup(clean_job); | |
e4165ae8 | 857 | v3d_put_multisync_post_deps(&se); |
57692c94 EA |
858 | |
859 | return ret; | |
860 | } | |
861 | ||
1584f16c EA |
862 | /** |
863 | * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. | |
864 | * @dev: DRM device | |
865 | * @data: ioctl argument | |
866 | * @file_priv: DRM file for this fd | |
867 | * | |
868 | * Userspace provides the register setup for the TFU, which we don't | |
869 | * need to validate since the TFU is behind the MMU. | |
870 | */ | |
871 | int | |
872 | v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, | |
873 | struct drm_file *file_priv) | |
874 | { | |
875 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
1584f16c | 876 | struct drm_v3d_submit_tfu *args = data; |
e4165ae8 | 877 | struct v3d_submit_ext se = {0}; |
07c2a416 | 878 | struct v3d_tfu_job *job = NULL; |
1584f16c | 879 | struct ww_acquire_ctx acquire_ctx; |
1584f16c | 880 | int ret = 0; |
1584f16c | 881 | |
55a9b748 EA |
882 | trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); |
883 | ||
bb3425ef MW |
884 | if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { |
885 | DRM_DEBUG("invalid flags: %d\n", args->flags); | |
886 | return -EINVAL; | |
887 | } | |
888 | ||
889 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { | |
e4165ae8 | 890 | ret = v3d_get_extensions(file_priv, args->extensions, &se); |
bb3425ef MW |
891 | if (ret) { |
892 | DRM_DEBUG("Failed to get extensions.\n"); | |
893 | return ret; | |
894 | } | |
895 | } | |
896 | ||
07c2a416 | 897 | ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), |
e4165ae8 | 898 | v3d_job_free, args->in_sync, &se, V3D_TFU); |
07c2a416 MW |
899 | if (ret) |
900 | goto fail; | |
1584f16c | 901 | |
a783a09e EA |
902 | job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), |
903 | sizeof(*job->base.bo), GFP_KERNEL); | |
904 | if (!job->base.bo) { | |
07c2a416 MW |
905 | ret = -ENOMEM; |
906 | goto fail; | |
a783a09e | 907 | } |
1584f16c EA |
908 | |
909 | job->args = *args; | |
1584f16c EA |
910 | |
911 | spin_lock(&file_priv->table_lock); | |
a783a09e EA |
912 | for (job->base.bo_count = 0; |
913 | job->base.bo_count < ARRAY_SIZE(args->bo_handles); | |
914 | job->base.bo_count++) { | |
1584f16c EA |
915 | struct drm_gem_object *bo; |
916 | ||
a783a09e | 917 | if (!args->bo_handles[job->base.bo_count]) |
1584f16c EA |
918 | break; |
919 | ||
920 | bo = idr_find(&file_priv->object_idr, | |
a783a09e | 921 | args->bo_handles[job->base.bo_count]); |
1584f16c EA |
922 | if (!bo) { |
923 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n", | |
a783a09e EA |
924 | job->base.bo_count, |
925 | args->bo_handles[job->base.bo_count]); | |
1584f16c EA |
926 | ret = -ENOENT; |
927 | spin_unlock(&file_priv->table_lock); | |
928 | goto fail; | |
929 | } | |
930 | drm_gem_object_get(bo); | |
a783a09e | 931 | job->base.bo[job->base.bo_count] = bo; |
1584f16c EA |
932 | } |
933 | spin_unlock(&file_priv->table_lock); | |
934 | ||
dffa9b7a | 935 | ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); |
1584f16c EA |
936 | if (ret) |
937 | goto fail; | |
938 | ||
939 | mutex_lock(&v3d->sched_lock); | |
916044fa | 940 | v3d_push_job(&job->base); |
1584f16c EA |
941 | mutex_unlock(&v3d->sched_lock); |
942 | ||
a783a09e EA |
943 | v3d_attach_fences_and_unlock_reservation(file_priv, |
944 | &job->base, &acquire_ctx, | |
d223f98f | 945 | args->out_sync, |
e4165ae8 | 946 | &se, |
d223f98f | 947 | job->base.done_fence); |
1584f16c | 948 | |
a783a09e | 949 | v3d_job_put(&job->base); |
1584f16c EA |
950 | |
951 | return 0; | |
952 | ||
1584f16c | 953 | fail: |
07c2a416 | 954 | v3d_job_cleanup((void *)job); |
e4165ae8 | 955 | v3d_put_multisync_post_deps(&se); |
1584f16c EA |
956 | |
957 | return ret; | |
958 | } | |
959 | ||
d223f98f EA |
960 | /** |
961 | * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D. | |
962 | * @dev: DRM device | |
963 | * @data: ioctl argument | |
964 | * @file_priv: DRM file for this fd | |
965 | * | |
966 | * Userspace provides the register setup for the CSD, which we don't | |
967 | * need to validate since the CSD is behind the MMU. | |
968 | */ | |
969 | int | |
970 | v3d_submit_csd_ioctl(struct drm_device *dev, void *data, | |
971 | struct drm_file *file_priv) | |
972 | { | |
973 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
974 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; | |
975 | struct drm_v3d_submit_csd *args = data; | |
e4165ae8 | 976 | struct v3d_submit_ext se = {0}; |
07c2a416 MW |
977 | struct v3d_csd_job *job = NULL; |
978 | struct v3d_job *clean_job = NULL; | |
d223f98f EA |
979 | struct ww_acquire_ctx acquire_ctx; |
980 | int ret; | |
981 | ||
982 | trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); | |
983 | ||
bb3425ef MW |
984 | if (args->pad) |
985 | return -EINVAL; | |
986 | ||
d223f98f EA |
987 | if (!v3d_has_csd(v3d)) { |
988 | DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); | |
989 | return -EINVAL; | |
990 | } | |
991 | ||
bb3425ef MW |
992 | if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { |
993 | DRM_INFO("invalid flags: %d\n", args->flags); | |
994 | return -EINVAL; | |
995 | } | |
996 | ||
997 | if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { | |
e4165ae8 | 998 | ret = v3d_get_extensions(file_priv, args->extensions, &se); |
bb3425ef MW |
999 | if (ret) { |
1000 | DRM_DEBUG("Failed to get extensions.\n"); | |
1001 | return ret; | |
1002 | } | |
1003 | } | |
1004 | ||
07c2a416 | 1005 | ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), |
e4165ae8 | 1006 | v3d_job_free, args->in_sync, &se, V3D_CSD); |
07c2a416 MW |
1007 | if (ret) |
1008 | goto fail; | |
d223f98f | 1009 | |
07c2a416 | 1010 | ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), |
75ad021f | 1011 | v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); |
07c2a416 MW |
1012 | if (ret) |
1013 | goto fail; | |
d223f98f EA |
1014 | |
1015 | job->args = *args; | |
1016 | ||
1017 | ret = v3d_lookup_bos(dev, file_priv, clean_job, | |
1018 | args->bo_handles, args->bo_handle_count); | |
1019 | if (ret) | |
1020 | goto fail; | |
1021 | ||
dffa9b7a | 1022 | ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx); |
d223f98f EA |
1023 | if (ret) |
1024 | goto fail; | |
1025 | ||
26a4dc29 JSR |
1026 | if (args->perfmon_id) { |
1027 | job->base.perfmon = v3d_perfmon_find(v3d_priv, | |
1028 | args->perfmon_id); | |
1029 | if (!job->base.perfmon) { | |
1030 | ret = -ENOENT; | |
e57c1a3b | 1031 | goto fail_perfmon; |
26a4dc29 JSR |
1032 | } |
1033 | } | |
1034 | ||
d223f98f | 1035 | mutex_lock(&v3d->sched_lock); |
916044fa | 1036 | v3d_push_job(&job->base); |
d223f98f | 1037 | |
da3208e8 DV |
1038 | ret = drm_sched_job_add_dependency(&clean_job->base, |
1039 | dma_fence_get(job->base.done_fence)); | |
dffa9b7a EA |
1040 | if (ret) |
1041 | goto fail_unreserve; | |
1042 | ||
916044fa | 1043 | v3d_push_job(clean_job); |
d223f98f EA |
1044 | mutex_unlock(&v3d->sched_lock); |
1045 | ||
1046 | v3d_attach_fences_and_unlock_reservation(file_priv, | |
1047 | clean_job, | |
1048 | &acquire_ctx, | |
1049 | args->out_sync, | |
e4165ae8 | 1050 | &se, |
d223f98f EA |
1051 | clean_job->done_fence); |
1052 | ||
1053 | v3d_job_put(&job->base); | |
1054 | v3d_job_put(clean_job); | |
1055 | ||
1056 | return 0; | |
1057 | ||
1058 | fail_unreserve: | |
1059 | mutex_unlock(&v3d->sched_lock); | |
e57c1a3b | 1060 | fail_perfmon: |
d223f98f EA |
1061 | drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, |
1062 | &acquire_ctx); | |
1063 | fail: | |
07c2a416 | 1064 | v3d_job_cleanup((void *)job); |
916044fa | 1065 | v3d_job_cleanup(clean_job); |
e4165ae8 | 1066 | v3d_put_multisync_post_deps(&se); |
d223f98f EA |
1067 | |
1068 | return ret; | |
1069 | } | |
1070 | ||
57692c94 EA |
1071 | int |
1072 | v3d_gem_init(struct drm_device *dev) | |
1073 | { | |
1074 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
1075 | u32 pt_size = 4096 * 1024; | |
1076 | int ret, i; | |
1077 | ||
1078 | for (i = 0; i < V3D_MAX_QUEUES; i++) | |
1079 | v3d->queue[i].fence_context = dma_fence_context_alloc(1); | |
1080 | ||
1081 | spin_lock_init(&v3d->mm_lock); | |
1082 | spin_lock_init(&v3d->job_lock); | |
1083 | mutex_init(&v3d->bo_lock); | |
1084 | mutex_init(&v3d->reset_lock); | |
7122b68b | 1085 | mutex_init(&v3d->sched_lock); |
d223f98f | 1086 | mutex_init(&v3d->cache_clean_lock); |
57692c94 EA |
1087 | |
1088 | /* Note: We don't allocate address 0. Various bits of HW | |
1089 | * treat 0 as special, such as the occlusion query counters | |
1090 | * where 0 means "disabled". | |
1091 | */ | |
1092 | drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); | |
1093 | ||
bc662528 | 1094 | v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size, |
57692c94 EA |
1095 | &v3d->pt_paddr, |
1096 | GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); | |
1097 | if (!v3d->pt) { | |
1098 | drm_mm_takedown(&v3d->mm); | |
bc662528 | 1099 | dev_err(v3d->drm.dev, |
223583dd | 1100 | "Failed to allocate page tables. Please ensure you have CMA enabled.\n"); |
57692c94 EA |
1101 | return -ENOMEM; |
1102 | } | |
1103 | ||
1104 | v3d_init_hw_state(v3d); | |
1105 | v3d_mmu_set_page_table(v3d); | |
1106 | ||
1107 | ret = v3d_sched_init(v3d); | |
1108 | if (ret) { | |
1109 | drm_mm_takedown(&v3d->mm); | |
bc662528 | 1110 | dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, |
57692c94 EA |
1111 | v3d->pt_paddr); |
1112 | } | |
1113 | ||
1114 | return 0; | |
1115 | } | |
1116 | ||
1117 | void | |
1118 | v3d_gem_destroy(struct drm_device *dev) | |
1119 | { | |
1120 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
57692c94 EA |
1121 | |
1122 | v3d_sched_fini(v3d); | |
1123 | ||
a783a09e | 1124 | /* Waiting for jobs to finish would need to be done before |
57692c94 EA |
1125 | * unregistering V3D. |
1126 | */ | |
14d1d190 EA |
1127 | WARN_ON(v3d->bin_job); |
1128 | WARN_ON(v3d->render_job); | |
57692c94 EA |
1129 | |
1130 | drm_mm_takedown(&v3d->mm); | |
1131 | ||
bc662528 DV |
1132 | dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, |
1133 | v3d->pt_paddr); | |
57692c94 | 1134 | } |