Commit | Line | Data |
---|---|---|
57692c94 EA |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* Copyright (C) 2014-2018 Broadcom */ | |
3 | ||
4 | #include <drm/drmP.h> | |
5 | #include <drm/drm_syncobj.h> | |
6 | #include <linux/module.h> | |
7 | #include <linux/platform_device.h> | |
8 | #include <linux/pm_runtime.h> | |
9 | #include <linux/device.h> | |
10 | #include <linux/io.h> | |
11 | #include <linux/sched/signal.h> | |
12 | ||
13 | #include "uapi/drm/v3d_drm.h" | |
14 | #include "v3d_drv.h" | |
15 | #include "v3d_regs.h" | |
16 | #include "v3d_trace.h" | |
17 | ||
18 | static void | |
19 | v3d_init_core(struct v3d_dev *v3d, int core) | |
20 | { | |
21 | /* Set OVRTMUOUT, which means that the texture sampler uniform | |
22 | * configuration's tmu output type field is used, instead of | |
23 | * using the hardware default behavior based on the texture | |
24 | * type. If you want the default behavior, you can still put | |
25 | * "2" in the indirect texture state's output_type field. | |
26 | */ | |
27 | V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); | |
28 | ||
29 | /* Whenever we flush the L2T cache, we always want to flush | |
30 | * the whole thing. | |
31 | */ | |
32 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); | |
33 | V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); | |
34 | } | |
35 | ||
36 | /* Sets invariant state for the HW. */ | |
37 | static void | |
38 | v3d_init_hw_state(struct v3d_dev *v3d) | |
39 | { | |
40 | v3d_init_core(v3d, 0); | |
41 | } | |
42 | ||
43 | static void | |
44 | v3d_idle_axi(struct v3d_dev *v3d, int core) | |
45 | { | |
46 | V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); | |
47 | ||
48 | if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & | |
49 | (V3D_GMP_STATUS_RD_COUNT_MASK | | |
50 | V3D_GMP_STATUS_WR_COUNT_MASK | | |
51 | V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { | |
52 | DRM_ERROR("Failed to wait for safe GMP shutdown\n"); | |
53 | } | |
54 | } | |
55 | ||
56 | static void | |
57 | v3d_idle_gca(struct v3d_dev *v3d) | |
58 | { | |
59 | if (v3d->ver >= 41) | |
60 | return; | |
61 | ||
62 | V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); | |
63 | ||
64 | if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & | |
65 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == | |
66 | V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { | |
67 | DRM_ERROR("Failed to wait for safe GCA shutdown\n"); | |
68 | } | |
69 | } | |
70 | ||
71 | static void | |
72 | v3d_reset_v3d(struct v3d_dev *v3d) | |
73 | { | |
74 | int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); | |
75 | ||
76 | if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { | |
77 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, | |
78 | V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); | |
79 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); | |
80 | ||
81 | /* GFXH-1383: The SW_INIT may cause a stray write to address 0 | |
82 | * of the unit, so reset it to its power-on value here. | |
83 | */ | |
84 | V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); | |
85 | } else { | |
86 | WARN_ON_ONCE(V3D_GET_FIELD(version, | |
87 | V3D_TOP_GR_BRIDGE_MAJOR) != 7); | |
88 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, | |
89 | V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); | |
90 | V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); | |
91 | } | |
92 | ||
93 | v3d_init_hw_state(v3d); | |
94 | } | |
95 | ||
96 | void | |
97 | v3d_reset(struct v3d_dev *v3d) | |
98 | { | |
99 | struct drm_device *dev = &v3d->drm; | |
100 | ||
101 | DRM_ERROR("Resetting GPU.\n"); | |
102 | trace_v3d_reset_begin(dev); | |
103 | ||
104 | /* XXX: only needed for safe powerdown, not reset. */ | |
105 | if (false) | |
106 | v3d_idle_axi(v3d, 0); | |
107 | ||
108 | v3d_idle_gca(v3d); | |
109 | v3d_reset_v3d(v3d); | |
110 | ||
111 | v3d_mmu_set_page_table(v3d); | |
112 | v3d_irq_reset(v3d); | |
113 | ||
114 | trace_v3d_reset_end(dev); | |
115 | } | |
116 | ||
117 | static void | |
118 | v3d_flush_l3(struct v3d_dev *v3d) | |
119 | { | |
120 | if (v3d->ver < 41) { | |
121 | u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); | |
122 | ||
123 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
124 | gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); | |
125 | ||
126 | if (v3d->ver < 33) { | |
127 | V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, | |
128 | gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); | |
129 | } | |
130 | } | |
131 | } | |
132 | ||
7b9d2fe4 EA |
133 | /* Invalidates the (read-only) L2C cache. This was the L2 cache for |
134 | * uniforms and instructions on V3D 3.2. | |
135 | */ | |
57692c94 | 136 | static void |
7b9d2fe4 | 137 | v3d_invalidate_l2c(struct v3d_dev *v3d, int core) |
57692c94 | 138 | { |
7b9d2fe4 EA |
139 | if (v3d->ver > 32) |
140 | return; | |
141 | ||
57692c94 EA |
142 | V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, |
143 | V3D_L2CACTL_L2CCLR | | |
144 | V3D_L2CACTL_L2CENA); | |
145 | } | |
146 | ||
57692c94 EA |
147 | /* Invalidates texture L2 cachelines */ |
148 | static void | |
149 | v3d_flush_l2t(struct v3d_dev *v3d, int core) | |
150 | { | |
51c1b6f9 EA |
151 | /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't |
152 | * need to wait for completion before dispatching the job -- | |
153 | * L2T accesses will be stalled until the flush has completed. | |
154 | */ | |
57692c94 EA |
155 | V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, |
156 | V3D_L2TCACTL_L2TFLS | | |
157 | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); | |
57692c94 EA |
158 | } |
159 | ||
160 | /* Invalidates the slice caches. These are read-only caches. */ | |
161 | static void | |
162 | v3d_invalidate_slices(struct v3d_dev *v3d, int core) | |
163 | { | |
164 | V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, | |
165 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | | |
166 | V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | | |
167 | V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | |
168 | V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | |
169 | } | |
170 | ||
57692c94 EA |
171 | void |
172 | v3d_invalidate_caches(struct v3d_dev *v3d) | |
173 | { | |
aa5beec3 EA |
174 | /* Invalidate the caches from the outside in. That way if |
175 | * another CL's concurrent use of nearby memory were to pull | |
176 | * an invalidated cacheline back in, we wouldn't leave stale | |
177 | * data in the inner cache. | |
178 | */ | |
57692c94 | 179 | v3d_flush_l3(v3d); |
7b9d2fe4 | 180 | v3d_invalidate_l2c(v3d, 0); |
57692c94 | 181 | v3d_flush_l2t(v3d, 0); |
aa5beec3 | 182 | v3d_invalidate_slices(v3d, 0); |
57692c94 EA |
183 | } |
184 | ||
57692c94 | 185 | static void |
1584f16c EA |
186 | v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, |
187 | struct dma_fence *fence) | |
57692c94 | 188 | { |
57692c94 EA |
189 | int i; |
190 | ||
1584f16c | 191 | for (i = 0; i < bo_count; i++) { |
57692c94 | 192 | /* XXX: Use shared fences for read-only objects. */ |
8d668309 | 193 | reservation_object_add_excl_fence(bos[i]->base.resv, fence); |
57692c94 EA |
194 | } |
195 | } | |
196 | ||
197 | static void | |
e14a07fc | 198 | v3d_unlock_bo_reservations(struct v3d_bo **bos, |
1584f16c | 199 | int bo_count, |
57692c94 EA |
200 | struct ww_acquire_ctx *acquire_ctx) |
201 | { | |
202 | int i; | |
203 | ||
1584f16c | 204 | for (i = 0; i < bo_count; i++) |
8d668309 | 205 | ww_mutex_unlock(&bos[i]->base.resv->lock); |
57692c94 EA |
206 | |
207 | ww_acquire_fini(acquire_ctx); | |
208 | } | |
209 | ||
210 | /* Takes the reservation lock on all the BOs being referenced, so that | |
211 | * at queue submit time we can update the reservations. | |
212 | * | |
213 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory | |
214 | * (all of which are on exec->unref_list). They're entirely private | |
215 | * to v3d, so we don't attach dma-buf fences to them. | |
216 | */ | |
217 | static int | |
e14a07fc | 218 | v3d_lock_bo_reservations(struct v3d_bo **bos, |
1584f16c | 219 | int bo_count, |
57692c94 EA |
220 | struct ww_acquire_ctx *acquire_ctx) |
221 | { | |
222 | int contended_lock = -1; | |
223 | int i, ret; | |
57692c94 EA |
224 | |
225 | ww_acquire_init(acquire_ctx, &reservation_ww_class); | |
226 | ||
227 | retry: | |
228 | if (contended_lock != -1) { | |
1584f16c | 229 | struct v3d_bo *bo = bos[contended_lock]; |
8f1cd826 | 230 | |
8d668309 | 231 | ret = ww_mutex_lock_slow_interruptible(&bo->base.resv->lock, |
57692c94 EA |
232 | acquire_ctx); |
233 | if (ret) { | |
234 | ww_acquire_done(acquire_ctx); | |
235 | return ret; | |
236 | } | |
237 | } | |
238 | ||
1584f16c | 239 | for (i = 0; i < bo_count; i++) { |
57692c94 EA |
240 | if (i == contended_lock) |
241 | continue; | |
242 | ||
8d668309 | 243 | ret = ww_mutex_lock_interruptible(&bos[i]->base.resv->lock, |
8f1cd826 | 244 | acquire_ctx); |
57692c94 EA |
245 | if (ret) { |
246 | int j; | |
247 | ||
8f1cd826 | 248 | for (j = 0; j < i; j++) |
8d668309 | 249 | ww_mutex_unlock(&bos[j]->base.resv->lock); |
57692c94 EA |
250 | |
251 | if (contended_lock != -1 && contended_lock >= i) { | |
1584f16c | 252 | struct v3d_bo *bo = bos[contended_lock]; |
57692c94 | 253 | |
8d668309 | 254 | ww_mutex_unlock(&bo->base.resv->lock); |
57692c94 EA |
255 | } |
256 | ||
257 | if (ret == -EDEADLK) { | |
258 | contended_lock = i; | |
259 | goto retry; | |
260 | } | |
261 | ||
262 | ww_acquire_done(acquire_ctx); | |
263 | return ret; | |
264 | } | |
265 | } | |
266 | ||
267 | ww_acquire_done(acquire_ctx); | |
268 | ||
269 | /* Reserve space for our shared (read-only) fence references, | |
270 | * before we commit the CL to the hardware. | |
271 | */ | |
1584f16c | 272 | for (i = 0; i < bo_count; i++) { |
8d668309 | 273 | ret = reservation_object_reserve_shared(bos[i]->base.resv, 1); |
57692c94 | 274 | if (ret) { |
e14a07fc | 275 | v3d_unlock_bo_reservations(bos, bo_count, |
1584f16c | 276 | acquire_ctx); |
57692c94 EA |
277 | return ret; |
278 | } | |
279 | } | |
280 | ||
281 | return 0; | |
282 | } | |
283 | ||
284 | /** | |
285 | * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects | |
286 | * referenced by the job. | |
287 | * @dev: DRM device | |
288 | * @file_priv: DRM file for this fd | |
289 | * @exec: V3D job being set up | |
290 | * | |
291 | * The command validator needs to reference BOs by their index within | |
292 | * the submitted job's BO list. This does the validation of the job's | |
293 | * BO list and reference counting for the lifetime of the job. | |
294 | * | |
295 | * Note that this function doesn't need to unreference the BOs on | |
296 | * failure, because that will happen at v3d_exec_cleanup() time. | |
297 | */ | |
298 | static int | |
299 | v3d_cl_lookup_bos(struct drm_device *dev, | |
300 | struct drm_file *file_priv, | |
301 | struct drm_v3d_submit_cl *args, | |
302 | struct v3d_exec_info *exec) | |
303 | { | |
304 | u32 *handles; | |
305 | int ret = 0; | |
306 | int i; | |
307 | ||
308 | exec->bo_count = args->bo_handle_count; | |
309 | ||
310 | if (!exec->bo_count) { | |
311 | /* See comment on bo_index for why we have to check | |
312 | * this. | |
313 | */ | |
314 | DRM_DEBUG("Rendering requires BOs\n"); | |
315 | return -EINVAL; | |
316 | } | |
317 | ||
318 | exec->bo = kvmalloc_array(exec->bo_count, | |
319 | sizeof(struct drm_gem_cma_object *), | |
320 | GFP_KERNEL | __GFP_ZERO); | |
321 | if (!exec->bo) { | |
322 | DRM_DEBUG("Failed to allocate validated BO pointers\n"); | |
323 | return -ENOMEM; | |
324 | } | |
325 | ||
326 | handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); | |
327 | if (!handles) { | |
328 | ret = -ENOMEM; | |
329 | DRM_DEBUG("Failed to allocate incoming GEM handles\n"); | |
330 | goto fail; | |
331 | } | |
332 | ||
333 | if (copy_from_user(handles, | |
334 | (void __user *)(uintptr_t)args->bo_handles, | |
335 | exec->bo_count * sizeof(u32))) { | |
336 | ret = -EFAULT; | |
337 | DRM_DEBUG("Failed to copy in GEM handles\n"); | |
338 | goto fail; | |
339 | } | |
340 | ||
341 | spin_lock(&file_priv->table_lock); | |
342 | for (i = 0; i < exec->bo_count; i++) { | |
343 | struct drm_gem_object *bo = idr_find(&file_priv->object_idr, | |
344 | handles[i]); | |
345 | if (!bo) { | |
346 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n", | |
347 | i, handles[i]); | |
348 | ret = -ENOENT; | |
349 | spin_unlock(&file_priv->table_lock); | |
350 | goto fail; | |
351 | } | |
352 | drm_gem_object_get(bo); | |
353 | exec->bo[i] = to_v3d_bo(bo); | |
354 | } | |
355 | spin_unlock(&file_priv->table_lock); | |
356 | ||
357 | fail: | |
358 | kvfree(handles); | |
359 | return ret; | |
360 | } | |
361 | ||
362 | static void | |
363 | v3d_exec_cleanup(struct kref *ref) | |
364 | { | |
365 | struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, | |
366 | refcount); | |
367 | struct v3d_dev *v3d = exec->v3d; | |
368 | unsigned int i; | |
369 | struct v3d_bo *bo, *save; | |
370 | ||
371 | dma_fence_put(exec->bin.in_fence); | |
372 | dma_fence_put(exec->render.in_fence); | |
373 | ||
374 | dma_fence_put(exec->bin.done_fence); | |
375 | dma_fence_put(exec->render.done_fence); | |
376 | ||
377 | dma_fence_put(exec->bin_done_fence); | |
34c2c4f6 | 378 | dma_fence_put(exec->render_done_fence); |
57692c94 EA |
379 | |
380 | for (i = 0; i < exec->bo_count; i++) | |
381 | drm_gem_object_put_unlocked(&exec->bo[i]->base); | |
382 | kvfree(exec->bo); | |
383 | ||
384 | list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { | |
385 | drm_gem_object_put_unlocked(&bo->base); | |
386 | } | |
387 | ||
388 | pm_runtime_mark_last_busy(v3d->dev); | |
389 | pm_runtime_put_autosuspend(v3d->dev); | |
390 | ||
391 | kfree(exec); | |
392 | } | |
393 | ||
394 | void v3d_exec_put(struct v3d_exec_info *exec) | |
395 | { | |
396 | kref_put(&exec->refcount, v3d_exec_cleanup); | |
397 | } | |
398 | ||
1584f16c EA |
399 | static void |
400 | v3d_tfu_job_cleanup(struct kref *ref) | |
401 | { | |
402 | struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, | |
403 | refcount); | |
404 | struct v3d_dev *v3d = job->v3d; | |
405 | unsigned int i; | |
406 | ||
407 | dma_fence_put(job->in_fence); | |
408 | dma_fence_put(job->done_fence); | |
409 | ||
410 | for (i = 0; i < ARRAY_SIZE(job->bo); i++) { | |
411 | if (job->bo[i]) | |
412 | drm_gem_object_put_unlocked(&job->bo[i]->base); | |
413 | } | |
414 | ||
415 | pm_runtime_mark_last_busy(v3d->dev); | |
416 | pm_runtime_put_autosuspend(v3d->dev); | |
417 | ||
418 | kfree(job); | |
419 | } | |
420 | ||
421 | void v3d_tfu_job_put(struct v3d_tfu_job *job) | |
422 | { | |
423 | kref_put(&job->refcount, v3d_tfu_job_cleanup); | |
424 | } | |
425 | ||
57692c94 EA |
426 | int |
427 | v3d_wait_bo_ioctl(struct drm_device *dev, void *data, | |
428 | struct drm_file *file_priv) | |
429 | { | |
430 | int ret; | |
431 | struct drm_v3d_wait_bo *args = data; | |
57692c94 EA |
432 | ktime_t start = ktime_get(); |
433 | u64 delta_ns; | |
434 | unsigned long timeout_jiffies = | |
435 | nsecs_to_jiffies_timeout(args->timeout_ns); | |
436 | ||
437 | if (args->pad != 0) | |
438 | return -EINVAL; | |
439 | ||
8d668309 RH |
440 | ret = drm_gem_reservation_object_wait(file_priv, args->handle, |
441 | true, timeout_jiffies); | |
57692c94 EA |
442 | |
443 | /* Decrement the user's timeout, in case we got interrupted | |
444 | * such that the ioctl will be restarted. | |
445 | */ | |
446 | delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); | |
447 | if (delta_ns < args->timeout_ns) | |
448 | args->timeout_ns -= delta_ns; | |
449 | else | |
450 | args->timeout_ns = 0; | |
451 | ||
452 | /* Asked to wait beyond the jiffie/scheduler precision? */ | |
453 | if (ret == -ETIME && args->timeout_ns) | |
454 | ret = -EAGAIN; | |
455 | ||
57692c94 EA |
456 | return ret; |
457 | } | |
458 | ||
459 | /** | |
460 | * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. | |
461 | * @dev: DRM device | |
462 | * @data: ioctl argument | |
463 | * @file_priv: DRM file for this fd | |
464 | * | |
465 | * This is the main entrypoint for userspace to submit a 3D frame to | |
466 | * the GPU. Userspace provides the binner command list (if | |
467 | * applicable), and the kernel sets up the render command list to draw | |
468 | * to the framebuffer described in the ioctl, using the command lists | |
469 | * that the 3D engine's binner will produce. | |
470 | */ | |
471 | int | |
472 | v3d_submit_cl_ioctl(struct drm_device *dev, void *data, | |
473 | struct drm_file *file_priv) | |
474 | { | |
475 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
476 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; | |
477 | struct drm_v3d_submit_cl *args = data; | |
478 | struct v3d_exec_info *exec; | |
479 | struct ww_acquire_ctx acquire_ctx; | |
480 | struct drm_syncobj *sync_out; | |
481 | int ret = 0; | |
482 | ||
55a9b748 EA |
483 | trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); |
484 | ||
57692c94 EA |
485 | if (args->pad != 0) { |
486 | DRM_INFO("pad must be zero: %d\n", args->pad); | |
487 | return -EINVAL; | |
488 | } | |
489 | ||
490 | exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); | |
491 | if (!exec) | |
492 | return -ENOMEM; | |
493 | ||
494 | ret = pm_runtime_get_sync(v3d->dev); | |
495 | if (ret < 0) { | |
496 | kfree(exec); | |
497 | return ret; | |
498 | } | |
499 | ||
500 | kref_init(&exec->refcount); | |
501 | ||
502 | ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, | |
649fdce2 | 503 | 0, 0, &exec->bin.in_fence); |
57692c94 EA |
504 | if (ret == -EINVAL) |
505 | goto fail; | |
506 | ||
507 | ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, | |
649fdce2 | 508 | 0, 0, &exec->render.in_fence); |
57692c94 EA |
509 | if (ret == -EINVAL) |
510 | goto fail; | |
511 | ||
512 | exec->qma = args->qma; | |
513 | exec->qms = args->qms; | |
514 | exec->qts = args->qts; | |
515 | exec->bin.exec = exec; | |
516 | exec->bin.start = args->bcl_start; | |
517 | exec->bin.end = args->bcl_end; | |
518 | exec->render.exec = exec; | |
519 | exec->render.start = args->rcl_start; | |
520 | exec->render.end = args->rcl_end; | |
521 | exec->v3d = v3d; | |
522 | INIT_LIST_HEAD(&exec->unref_list); | |
523 | ||
524 | ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); | |
525 | if (ret) | |
526 | goto fail; | |
527 | ||
e14a07fc | 528 | ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count, |
1584f16c | 529 | &acquire_ctx); |
57692c94 EA |
530 | if (ret) |
531 | goto fail; | |
532 | ||
7122b68b | 533 | mutex_lock(&v3d->sched_lock); |
57692c94 EA |
534 | if (exec->bin.start != exec->bin.end) { |
535 | ret = drm_sched_job_init(&exec->bin.base, | |
57692c94 EA |
536 | &v3d_priv->sched_entity[V3D_BIN], |
537 | v3d_priv); | |
538 | if (ret) | |
539 | goto fail_unreserve; | |
540 | ||
541 | exec->bin_done_fence = | |
542 | dma_fence_get(&exec->bin.base.s_fence->finished); | |
543 | ||
544 | kref_get(&exec->refcount); /* put by scheduler job completion */ | |
545 | drm_sched_entity_push_job(&exec->bin.base, | |
546 | &v3d_priv->sched_entity[V3D_BIN]); | |
547 | } | |
548 | ||
549 | ret = drm_sched_job_init(&exec->render.base, | |
57692c94 EA |
550 | &v3d_priv->sched_entity[V3D_RENDER], |
551 | v3d_priv); | |
552 | if (ret) | |
553 | goto fail_unreserve; | |
554 | ||
34c2c4f6 EA |
555 | exec->render_done_fence = |
556 | dma_fence_get(&exec->render.base.s_fence->finished); | |
557 | ||
57692c94 EA |
558 | kref_get(&exec->refcount); /* put by scheduler job completion */ |
559 | drm_sched_entity_push_job(&exec->render.base, | |
560 | &v3d_priv->sched_entity[V3D_RENDER]); | |
7122b68b | 561 | mutex_unlock(&v3d->sched_lock); |
57692c94 | 562 | |
1584f16c EA |
563 | v3d_attach_object_fences(exec->bo, exec->bo_count, |
564 | exec->render_done_fence); | |
57692c94 | 565 | |
e14a07fc | 566 | v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); |
57692c94 EA |
567 | |
568 | /* Update the return sync object for the */ | |
569 | sync_out = drm_syncobj_find(file_priv, args->out_sync); | |
570 | if (sync_out) { | |
0b258ed1 | 571 | drm_syncobj_replace_fence(sync_out, exec->render_done_fence); |
57692c94 EA |
572 | drm_syncobj_put(sync_out); |
573 | } | |
574 | ||
575 | v3d_exec_put(exec); | |
576 | ||
577 | return 0; | |
578 | ||
579 | fail_unreserve: | |
7122b68b | 580 | mutex_unlock(&v3d->sched_lock); |
e14a07fc | 581 | v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); |
57692c94 EA |
582 | fail: |
583 | v3d_exec_put(exec); | |
584 | ||
585 | return ret; | |
586 | } | |
587 | ||
1584f16c EA |
588 | /** |
589 | * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. | |
590 | * @dev: DRM device | |
591 | * @data: ioctl argument | |
592 | * @file_priv: DRM file for this fd | |
593 | * | |
594 | * Userspace provides the register setup for the TFU, which we don't | |
595 | * need to validate since the TFU is behind the MMU. | |
596 | */ | |
597 | int | |
598 | v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, | |
599 | struct drm_file *file_priv) | |
600 | { | |
601 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
602 | struct v3d_file_priv *v3d_priv = file_priv->driver_priv; | |
603 | struct drm_v3d_submit_tfu *args = data; | |
604 | struct v3d_tfu_job *job; | |
605 | struct ww_acquire_ctx acquire_ctx; | |
606 | struct drm_syncobj *sync_out; | |
607 | struct dma_fence *sched_done_fence; | |
608 | int ret = 0; | |
609 | int bo_count; | |
610 | ||
55a9b748 EA |
611 | trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); |
612 | ||
1584f16c EA |
613 | job = kcalloc(1, sizeof(*job), GFP_KERNEL); |
614 | if (!job) | |
615 | return -ENOMEM; | |
616 | ||
617 | ret = pm_runtime_get_sync(v3d->dev); | |
618 | if (ret < 0) { | |
619 | kfree(job); | |
620 | return ret; | |
621 | } | |
622 | ||
623 | kref_init(&job->refcount); | |
624 | ||
625 | ret = drm_syncobj_find_fence(file_priv, args->in_sync, | |
626 | 0, 0, &job->in_fence); | |
627 | if (ret == -EINVAL) | |
628 | goto fail; | |
629 | ||
630 | job->args = *args; | |
631 | job->v3d = v3d; | |
632 | ||
633 | spin_lock(&file_priv->table_lock); | |
634 | for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { | |
635 | struct drm_gem_object *bo; | |
636 | ||
637 | if (!args->bo_handles[bo_count]) | |
638 | break; | |
639 | ||
640 | bo = idr_find(&file_priv->object_idr, | |
641 | args->bo_handles[bo_count]); | |
642 | if (!bo) { | |
643 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n", | |
644 | bo_count, args->bo_handles[bo_count]); | |
645 | ret = -ENOENT; | |
646 | spin_unlock(&file_priv->table_lock); | |
647 | goto fail; | |
648 | } | |
649 | drm_gem_object_get(bo); | |
650 | job->bo[bo_count] = to_v3d_bo(bo); | |
651 | } | |
652 | spin_unlock(&file_priv->table_lock); | |
653 | ||
e14a07fc | 654 | ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx); |
1584f16c EA |
655 | if (ret) |
656 | goto fail; | |
657 | ||
658 | mutex_lock(&v3d->sched_lock); | |
659 | ret = drm_sched_job_init(&job->base, | |
660 | &v3d_priv->sched_entity[V3D_TFU], | |
661 | v3d_priv); | |
662 | if (ret) | |
663 | goto fail_unreserve; | |
664 | ||
665 | sched_done_fence = dma_fence_get(&job->base.s_fence->finished); | |
666 | ||
667 | kref_get(&job->refcount); /* put by scheduler job completion */ | |
668 | drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); | |
669 | mutex_unlock(&v3d->sched_lock); | |
670 | ||
671 | v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); | |
672 | ||
e14a07fc | 673 | v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); |
1584f16c EA |
674 | |
675 | /* Update the return sync object */ | |
676 | sync_out = drm_syncobj_find(file_priv, args->out_sync); | |
677 | if (sync_out) { | |
2312f984 | 678 | drm_syncobj_replace_fence(sync_out, sched_done_fence); |
1584f16c EA |
679 | drm_syncobj_put(sync_out); |
680 | } | |
681 | dma_fence_put(sched_done_fence); | |
682 | ||
683 | v3d_tfu_job_put(job); | |
684 | ||
685 | return 0; | |
686 | ||
687 | fail_unreserve: | |
688 | mutex_unlock(&v3d->sched_lock); | |
e14a07fc | 689 | v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); |
1584f16c EA |
690 | fail: |
691 | v3d_tfu_job_put(job); | |
692 | ||
693 | return ret; | |
694 | } | |
695 | ||
57692c94 EA |
696 | int |
697 | v3d_gem_init(struct drm_device *dev) | |
698 | { | |
699 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
700 | u32 pt_size = 4096 * 1024; | |
701 | int ret, i; | |
702 | ||
703 | for (i = 0; i < V3D_MAX_QUEUES; i++) | |
704 | v3d->queue[i].fence_context = dma_fence_context_alloc(1); | |
705 | ||
706 | spin_lock_init(&v3d->mm_lock); | |
707 | spin_lock_init(&v3d->job_lock); | |
708 | mutex_init(&v3d->bo_lock); | |
709 | mutex_init(&v3d->reset_lock); | |
7122b68b | 710 | mutex_init(&v3d->sched_lock); |
57692c94 EA |
711 | |
712 | /* Note: We don't allocate address 0. Various bits of HW | |
713 | * treat 0 as special, such as the occlusion query counters | |
714 | * where 0 means "disabled". | |
715 | */ | |
716 | drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); | |
717 | ||
718 | v3d->pt = dma_alloc_wc(v3d->dev, pt_size, | |
719 | &v3d->pt_paddr, | |
720 | GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); | |
721 | if (!v3d->pt) { | |
722 | drm_mm_takedown(&v3d->mm); | |
723 | dev_err(v3d->dev, | |
724 | "Failed to allocate page tables. " | |
725 | "Please ensure you have CMA enabled.\n"); | |
726 | return -ENOMEM; | |
727 | } | |
728 | ||
729 | v3d_init_hw_state(v3d); | |
730 | v3d_mmu_set_page_table(v3d); | |
731 | ||
732 | ret = v3d_sched_init(v3d); | |
733 | if (ret) { | |
734 | drm_mm_takedown(&v3d->mm); | |
735 | dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, | |
736 | v3d->pt_paddr); | |
737 | } | |
738 | ||
739 | return 0; | |
740 | } | |
741 | ||
742 | void | |
743 | v3d_gem_destroy(struct drm_device *dev) | |
744 | { | |
745 | struct v3d_dev *v3d = to_v3d_dev(dev); | |
57692c94 EA |
746 | |
747 | v3d_sched_fini(v3d); | |
748 | ||
749 | /* Waiting for exec to finish would need to be done before | |
750 | * unregistering V3D. | |
751 | */ | |
14d1d190 EA |
752 | WARN_ON(v3d->bin_job); |
753 | WARN_ON(v3d->render_job); | |
57692c94 EA |
754 | |
755 | drm_mm_takedown(&v3d->mm); | |
756 | ||
757 | dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr); | |
758 | } |