Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e941759c ML |
2 | /* |
3 | * Fence mechanism for dma-buf and to allow for asynchronous dma access | |
4 | * | |
5 | * Copyright (C) 2012 Canonical Ltd | |
6 | * Copyright (C) 2012 Texas Instruments | |
7 | * | |
8 | * Authors: | |
9 | * Rob Clark <robdclark@gmail.com> | |
10 | * Maarten Lankhorst <maarten.lankhorst@canonical.com> | |
e941759c ML |
11 | */ |
12 | ||
13 | #include <linux/slab.h> | |
14 | #include <linux/export.h> | |
15 | #include <linux/atomic.h> | |
f54d1867 | 16 | #include <linux/dma-fence.h> |
174cd4b1 | 17 | #include <linux/sched/signal.h> |
a25efb38 | 18 | #include <linux/seq_file.h> |
e941759c ML |
19 | |
20 | #define CREATE_TRACE_POINTS | |
f54d1867 | 21 | #include <trace/events/dma_fence.h> |
e941759c | 22 | |
f54d1867 | 23 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); |
8c96c678 | 24 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); |
c36beba6 | 25 | EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); |
e941759c | 26 | |
078dec33 CK |
27 | static DEFINE_SPINLOCK(dma_fence_stub_lock); |
28 | static struct dma_fence dma_fence_stub; | |
29 | ||
e9f3b796 | 30 | /* |
e941759c ML |
31 | * fence context counter: each execution context should have its own |
32 | * fence context, this allows checking if fences belong to the same | |
33 | * context or not. One device can have multiple separate contexts, | |
34 | * and they're used if some engine can run independently of another. | |
35 | */ | |
078dec33 | 36 | static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); |
e941759c | 37 | |
4dd3cdb2 DV |
38 | /** |
39 | * DOC: DMA fences overview | |
40 | * | |
41 | * DMA fences, represented by &struct dma_fence, are the kernel internal | |
42 | * synchronization primitive for DMA operations like GPU rendering, video | |
43 | * encoding/decoding, or displaying buffers on a screen. | |
44 | * | |
45 | * A fence is initialized using dma_fence_init() and completed using | |
46 | * dma_fence_signal(). Fences are associated with a context, allocated through | |
47 | * dma_fence_context_alloc(), and all fences on the same context are | |
48 | * fully ordered. | |
49 | * | |
50 | * Since the purposes of fences is to facilitate cross-device and | |
51 | * cross-application synchronization, there's multiple ways to use one: | |
52 | * | |
53 | * - Individual fences can be exposed as a &sync_file, accessed as a file | |
54 | * descriptor from userspace, created by calling sync_file_create(). This is | |
55 | * called explicit fencing, since userspace passes around explicit | |
56 | * synchronization points. | |
57 | * | |
58 | * - Some subsystems also have their own explicit fencing primitives, like | |
59 | * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying | |
60 | * fence to be updated. | |
61 | * | |
62 | * - Then there's also implicit fencing, where the synchronization points are | |
63 | * implicitly passed around as part of shared &dma_buf instances. Such | |
52791eee | 64 | * implicit fences are stored in &struct dma_resv through the |
4dd3cdb2 DV |
65 | * &dma_buf.resv pointer. |
66 | */ | |
67 | ||
d0b9a9ae DV |
68 | /** |
69 | * DOC: fence cross-driver contract | |
70 | * | |
71 | * Since &dma_fence provide a cross driver contract, all drivers must follow the | |
72 | * same rules: | |
73 | * | |
74 | * * Fences must complete in a reasonable time. Fences which represent kernels | |
75 | * and shaders submitted by userspace, which could run forever, must be backed | |
76 | * up by timeout and gpu hang recovery code. Minimally that code must prevent | |
77 | * further command submission and force complete all in-flight fences, e.g. | |
78 | * when the driver or hardware do not support gpu reset, or if the gpu reset | |
79 | * failed for some reason. Ideally the driver supports gpu recovery which only | |
80 | * affects the offending userspace context, and no other userspace | |
81 | * submissions. | |
82 | * | |
83 | * * Drivers may have different ideas of what completion within a reasonable | |
84 | * time means. Some hang recovery code uses a fixed timeout, others a mix | |
85 | * between observing forward progress and increasingly strict timeouts. | |
86 | * Drivers should not try to second guess timeout handling of fences from | |
87 | * other drivers. | |
88 | * | |
89 | * * To ensure there's no deadlocks of dma_fence_wait() against other locks | |
90 | * drivers should annotate all code required to reach dma_fence_signal(), | |
91 | * which completes the fences, with dma_fence_begin_signalling() and | |
92 | * dma_fence_end_signalling(). | |
93 | * | |
94 | * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). | |
95 | * This means any code required for fence completion cannot acquire a | |
96 | * &dma_resv lock. Note that this also pulls in the entire established | |
97 | * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). | |
98 | * | |
99 | * * Drivers are allowed to call dma_fence_wait() from their &shrinker | |
100 | * callbacks. This means any code required for fence completion cannot | |
101 | * allocate memory with GFP_KERNEL. | |
102 | * | |
103 | * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier | |
104 | * respectively &mmu_interval_notifier callbacks. This means any code required | |
105 | * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO. | |
106 | * Only GFP_ATOMIC is permissible, which might fail. | |
107 | * | |
108 | * Note that only GPU drivers have a reasonable excuse for both requiring | |
109 | * &mmu_interval_notifier and &shrinker callbacks at the same time as having to | |
110 | * track asynchronous compute work using &dma_fence. No driver outside of | |
111 | * drivers/gpu should ever call dma_fence_wait() in such contexts. | |
112 | */ | |
113 | ||
078dec33 CK |
114 | static const char *dma_fence_stub_get_name(struct dma_fence *fence) |
115 | { | |
116 | return "stub"; | |
117 | } | |
118 | ||
119 | static const struct dma_fence_ops dma_fence_stub_ops = { | |
120 | .get_driver_name = dma_fence_stub_get_name, | |
121 | .get_timeline_name = dma_fence_stub_get_name, | |
122 | }; | |
123 | ||
124 | /** | |
125 | * dma_fence_get_stub - return a signaled fence | |
126 | * | |
fd921693 DS |
127 | * Return a stub fence which is already signaled. The fence's |
128 | * timestamp corresponds to the first time after boot this | |
129 | * function is called. | |
078dec33 CK |
130 | */ |
131 | struct dma_fence *dma_fence_get_stub(void) | |
132 | { | |
133 | spin_lock(&dma_fence_stub_lock); | |
134 | if (!dma_fence_stub.ops) { | |
135 | dma_fence_init(&dma_fence_stub, | |
136 | &dma_fence_stub_ops, | |
137 | &dma_fence_stub_lock, | |
138 | 0, 0); | |
c85d00d4 AY |
139 | |
140 | set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, | |
141 | &dma_fence_stub.flags); | |
142 | ||
078dec33 CK |
143 | dma_fence_signal_locked(&dma_fence_stub); |
144 | } | |
145 | spin_unlock(&dma_fence_stub_lock); | |
146 | ||
147 | return dma_fence_get(&dma_fence_stub); | |
148 | } | |
149 | EXPORT_SYMBOL(dma_fence_get_stub); | |
150 | ||
fd921693 DS |
151 | /** |
152 | * dma_fence_allocate_private_stub - return a private, signaled fence | |
153 | * | |
154 | * Return a newly allocated and signaled stub fence. | |
155 | */ | |
156 | struct dma_fence *dma_fence_allocate_private_stub(void) | |
157 | { | |
158 | struct dma_fence *fence; | |
159 | ||
160 | fence = kzalloc(sizeof(*fence), GFP_KERNEL); | |
161 | if (fence == NULL) | |
162 | return ERR_PTR(-ENOMEM); | |
163 | ||
164 | dma_fence_init(fence, | |
165 | &dma_fence_stub_ops, | |
166 | &dma_fence_stub_lock, | |
167 | 0, 0); | |
c85d00d4 AY |
168 | |
169 | set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, | |
d2ceea0e | 170 | &fence->flags); |
c85d00d4 | 171 | |
fd921693 DS |
172 | dma_fence_signal(fence); |
173 | ||
174 | return fence; | |
175 | } | |
176 | EXPORT_SYMBOL(dma_fence_allocate_private_stub); | |
177 | ||
e941759c | 178 | /** |
f54d1867 | 179 | * dma_fence_context_alloc - allocate an array of fence contexts |
4dd3cdb2 | 180 | * @num: amount of contexts to allocate |
e941759c | 181 | * |
4dd3cdb2 DV |
182 | * This function will return the first index of the number of fence contexts |
183 | * allocated. The fence context is used for setting &dma_fence.context to a | |
184 | * unique number by passing the context to dma_fence_init(). | |
e941759c | 185 | */ |
f54d1867 | 186 | u64 dma_fence_context_alloc(unsigned num) |
e941759c | 187 | { |
6ce31263 | 188 | WARN_ON(!num); |
1c530d43 | 189 | return atomic64_fetch_add(num, &dma_fence_context_counter); |
e941759c | 190 | } |
f54d1867 | 191 | EXPORT_SYMBOL(dma_fence_context_alloc); |
e941759c | 192 | |
5fbff813 DV |
193 | /** |
194 | * DOC: fence signalling annotation | |
195 | * | |
196 | * Proving correctness of all the kernel code around &dma_fence through code | |
197 | * review and testing is tricky for a few reasons: | |
198 | * | |
199 | * * It is a cross-driver contract, and therefore all drivers must follow the | |
200 | * same rules for lock nesting order, calling contexts for various functions | |
201 | * and anything else significant for in-kernel interfaces. But it is also | |
202 | * impossible to test all drivers in a single machine, hence brute-force N vs. | |
203 | * N testing of all combinations is impossible. Even just limiting to the | |
204 | * possible combinations is infeasible. | |
205 | * | |
206 | * * There is an enormous amount of driver code involved. For render drivers | |
207 | * there's the tail of command submission, after fences are published, | |
208 | * scheduler code, interrupt and workers to process job completion, | |
209 | * and timeout, gpu reset and gpu hang recovery code. Plus for integration | |
210 | * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, | |
211 | * and &shrinker. For modesetting drivers there's the commit tail functions | |
212 | * between when fences for an atomic modeset are published, and when the | |
213 | * corresponding vblank completes, including any interrupt processing and | |
214 | * related workers. Auditing all that code, across all drivers, is not | |
215 | * feasible. | |
216 | * | |
217 | * * Due to how many other subsystems are involved and the locking hierarchies | |
218 | * this pulls in there is extremely thin wiggle-room for driver-specific | |
219 | * differences. &dma_fence interacts with almost all of the core memory | |
220 | * handling through page fault handlers via &dma_resv, dma_resv_lock() and | |
221 | * dma_resv_unlock(). On the other side it also interacts through all | |
222 | * allocation sites through &mmu_notifier and &shrinker. | |
223 | * | |
224 | * Furthermore lockdep does not handle cross-release dependencies, which means | |
225 | * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught | |
226 | * at runtime with some quick testing. The simplest example is one thread | |
227 | * waiting on a &dma_fence while holding a lock:: | |
228 | * | |
229 | * lock(A); | |
230 | * dma_fence_wait(B); | |
231 | * unlock(A); | |
232 | * | |
233 | * while the other thread is stuck trying to acquire the same lock, which | |
234 | * prevents it from signalling the fence the previous thread is stuck waiting | |
235 | * on:: | |
236 | * | |
237 | * lock(A); | |
238 | * unlock(A); | |
239 | * dma_fence_signal(B); | |
240 | * | |
241 | * By manually annotating all code relevant to signalling a &dma_fence we can | |
242 | * teach lockdep about these dependencies, which also helps with the validation | |
243 | * headache since now lockdep can check all the rules for us:: | |
244 | * | |
245 | * cookie = dma_fence_begin_signalling(); | |
246 | * lock(A); | |
247 | * unlock(A); | |
248 | * dma_fence_signal(B); | |
249 | * dma_fence_end_signalling(cookie); | |
250 | * | |
251 | * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to | |
252 | * annotate critical sections the following rules need to be observed: | |
253 | * | |
254 | * * All code necessary to complete a &dma_fence must be annotated, from the | |
255 | * point where a fence is accessible to other threads, to the point where | |
256 | * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, | |
257 | * and due to the very strict rules and many corner cases it is infeasible to | |
258 | * catch these just with review or normal stress testing. | |
259 | * | |
260 | * * &struct dma_resv deserves a special note, since the readers are only | |
261 | * protected by rcu. This means the signalling critical section starts as soon | |
262 | * as the new fences are installed, even before dma_resv_unlock() is called. | |
263 | * | |
264 | * * The only exception are fast paths and opportunistic signalling code, which | |
265 | * calls dma_fence_signal() purely as an optimization, but is not required to | |
266 | * guarantee completion of a &dma_fence. The usual example is a wait IOCTL | |
267 | * which calls dma_fence_signal(), while the mandatory completion path goes | |
268 | * through a hardware interrupt and possible job completion worker. | |
269 | * | |
270 | * * To aid composability of code, the annotations can be freely nested, as long | |
271 | * as the overall locking hierarchy is consistent. The annotations also work | |
272 | * both in interrupt and process context. Due to implementation details this | |
273 | * requires that callers pass an opaque cookie from | |
274 | * dma_fence_begin_signalling() to dma_fence_end_signalling(). | |
275 | * | |
276 | * * Validation against the cross driver contract is implemented by priming | |
277 | * lockdep with the relevant hierarchy at boot-up. This means even just | |
278 | * testing with a single device is enough to validate a driver, at least as | |
279 | * far as deadlocks with dma_fence_wait() against dma_fence_signal() are | |
280 | * concerned. | |
281 | */ | |
282 | #ifdef CONFIG_LOCKDEP | |
fa07634d | 283 | static struct lockdep_map dma_fence_lockdep_map = { |
5fbff813 DV |
284 | .name = "dma_fence_map" |
285 | }; | |
286 | ||
287 | /** | |
288 | * dma_fence_begin_signalling - begin a critical DMA fence signalling section | |
289 | * | |
290 | * Drivers should use this to annotate the beginning of any code section | |
291 | * required to eventually complete &dma_fence by calling dma_fence_signal(). | |
292 | * | |
293 | * The end of these critical sections are annotated with | |
294 | * dma_fence_end_signalling(). | |
295 | * | |
296 | * Returns: | |
297 | * | |
298 | * Opaque cookie needed by the implementation, which needs to be passed to | |
299 | * dma_fence_end_signalling(). | |
300 | */ | |
301 | bool dma_fence_begin_signalling(void) | |
302 | { | |
303 | /* explicitly nesting ... */ | |
304 | if (lock_is_held_type(&dma_fence_lockdep_map, 1)) | |
305 | return true; | |
306 | ||
307 | /* rely on might_sleep check for soft/hardirq locks */ | |
308 | if (in_atomic()) | |
309 | return true; | |
310 | ||
311 | /* ... and non-recursive readlock */ | |
312 | lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); | |
313 | ||
314 | return false; | |
315 | } | |
316 | EXPORT_SYMBOL(dma_fence_begin_signalling); | |
317 | ||
318 | /** | |
319 | * dma_fence_end_signalling - end a critical DMA fence signalling section | |
e44cd6bc | 320 | * @cookie: opaque cookie from dma_fence_begin_signalling() |
5fbff813 DV |
321 | * |
322 | * Closes a critical section annotation opened by dma_fence_begin_signalling(). | |
323 | */ | |
324 | void dma_fence_end_signalling(bool cookie) | |
325 | { | |
326 | if (cookie) | |
327 | return; | |
328 | ||
329 | lock_release(&dma_fence_lockdep_map, _RET_IP_); | |
330 | } | |
331 | EXPORT_SYMBOL(dma_fence_end_signalling); | |
332 | ||
333 | void __dma_fence_might_wait(void) | |
334 | { | |
335 | bool tmp; | |
336 | ||
337 | tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); | |
338 | if (tmp) | |
339 | lock_release(&dma_fence_lockdep_map, _THIS_IP_); | |
340 | lock_map_acquire(&dma_fence_lockdep_map); | |
341 | lock_map_release(&dma_fence_lockdep_map); | |
342 | if (tmp) | |
343 | lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_); | |
344 | } | |
345 | #endif | |
346 | ||
347 | ||
e941759c | 348 | /** |
5a164ac4 | 349 | * dma_fence_signal_timestamp_locked - signal completion of a fence |
e941759c | 350 | * @fence: the fence to signal |
5a164ac4 | 351 | * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain |
e941759c ML |
352 | * |
353 | * Signal completion for software callbacks on a fence, this will unblock | |
f54d1867 CW |
354 | * dma_fence_wait() calls and run all the callbacks added with |
355 | * dma_fence_add_callback(). Can be called multiple times, but since a fence | |
4dd3cdb2 | 356 | * can only go from the unsignaled to the signaled state and not back, it will |
5a164ac4 VSS |
357 | * only be effective the first time. Set the timestamp provided as the fence |
358 | * signal timestamp. | |
4dd3cdb2 | 359 | * |
5a164ac4 VSS |
360 | * Unlike dma_fence_signal_timestamp(), this function must be called with |
361 | * &dma_fence.lock held. | |
e941759c | 362 | * |
4dd3cdb2 DV |
363 | * Returns 0 on success and a negative error value when @fence has been |
364 | * signalled already. | |
e941759c | 365 | */ |
5a164ac4 VSS |
366 | int dma_fence_signal_timestamp_locked(struct dma_fence *fence, |
367 | ktime_t timestamp) | |
e941759c | 368 | { |
f54d1867 | 369 | struct dma_fence_cb *cur, *tmp; |
f2cb60e9 | 370 | struct list_head cb_list; |
e941759c | 371 | |
78010cd9 RC |
372 | lockdep_assert_held(fence->lock); |
373 | ||
0fc89b68 CW |
374 | if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
375 | &fence->flags))) | |
e941759c ML |
376 | return -EINVAL; |
377 | ||
f2cb60e9 CW |
378 | /* Stash the cb_list before replacing it with the timestamp */ |
379 | list_replace(&fence->cb_list, &cb_list); | |
380 | ||
5a164ac4 | 381 | fence->timestamp = timestamp; |
0fc89b68 CW |
382 | set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); |
383 | trace_dma_fence_signaled(fence); | |
e941759c | 384 | |
f2cb60e9 CW |
385 | list_for_each_entry_safe(cur, tmp, &cb_list, node) { |
386 | INIT_LIST_HEAD(&cur->node); | |
387 | cur->func(fence, cur); | |
e941759c | 388 | } |
0fc89b68 CW |
389 | |
390 | return 0; | |
e941759c | 391 | } |
5a164ac4 VSS |
392 | EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); |
393 | ||
394 | /** | |
395 | * dma_fence_signal_timestamp - signal completion of a fence | |
396 | * @fence: the fence to signal | |
397 | * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain | |
398 | * | |
399 | * Signal completion for software callbacks on a fence, this will unblock | |
400 | * dma_fence_wait() calls and run all the callbacks added with | |
401 | * dma_fence_add_callback(). Can be called multiple times, but since a fence | |
402 | * can only go from the unsignaled to the signaled state and not back, it will | |
403 | * only be effective the first time. Set the timestamp provided as the fence | |
404 | * signal timestamp. | |
405 | * | |
406 | * Returns 0 on success and a negative error value when @fence has been | |
407 | * signalled already. | |
408 | */ | |
409 | int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) | |
410 | { | |
411 | unsigned long flags; | |
412 | int ret; | |
413 | ||
414 | if (!fence) | |
415 | return -EINVAL; | |
416 | ||
417 | spin_lock_irqsave(fence->lock, flags); | |
418 | ret = dma_fence_signal_timestamp_locked(fence, timestamp); | |
419 | spin_unlock_irqrestore(fence->lock, flags); | |
420 | ||
421 | return ret; | |
422 | } | |
423 | EXPORT_SYMBOL(dma_fence_signal_timestamp); | |
424 | ||
425 | /** | |
426 | * dma_fence_signal_locked - signal completion of a fence | |
427 | * @fence: the fence to signal | |
428 | * | |
429 | * Signal completion for software callbacks on a fence, this will unblock | |
430 | * dma_fence_wait() calls and run all the callbacks added with | |
431 | * dma_fence_add_callback(). Can be called multiple times, but since a fence | |
432 | * can only go from the unsignaled to the signaled state and not back, it will | |
433 | * only be effective the first time. | |
434 | * | |
435 | * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock | |
436 | * held. | |
437 | * | |
438 | * Returns 0 on success and a negative error value when @fence has been | |
439 | * signalled already. | |
440 | */ | |
441 | int dma_fence_signal_locked(struct dma_fence *fence) | |
442 | { | |
443 | return dma_fence_signal_timestamp_locked(fence, ktime_get()); | |
444 | } | |
f54d1867 | 445 | EXPORT_SYMBOL(dma_fence_signal_locked); |
e941759c ML |
446 | |
447 | /** | |
f54d1867 | 448 | * dma_fence_signal - signal completion of a fence |
e941759c ML |
449 | * @fence: the fence to signal |
450 | * | |
451 | * Signal completion for software callbacks on a fence, this will unblock | |
f54d1867 CW |
452 | * dma_fence_wait() calls and run all the callbacks added with |
453 | * dma_fence_add_callback(). Can be called multiple times, but since a fence | |
4dd3cdb2 DV |
454 | * can only go from the unsignaled to the signaled state and not back, it will |
455 | * only be effective the first time. | |
456 | * | |
457 | * Returns 0 on success and a negative error value when @fence has been | |
458 | * signalled already. | |
e941759c | 459 | */ |
f54d1867 | 460 | int dma_fence_signal(struct dma_fence *fence) |
e941759c ML |
461 | { |
462 | unsigned long flags; | |
0fc89b68 | 463 | int ret; |
5fbff813 | 464 | bool tmp; |
e941759c ML |
465 | |
466 | if (!fence) | |
467 | return -EINVAL; | |
468 | ||
5fbff813 DV |
469 | tmp = dma_fence_begin_signalling(); |
470 | ||
0fc89b68 | 471 | spin_lock_irqsave(fence->lock, flags); |
5a164ac4 | 472 | ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); |
0fc89b68 | 473 | spin_unlock_irqrestore(fence->lock, flags); |
e941759c | 474 | |
5fbff813 DV |
475 | dma_fence_end_signalling(tmp); |
476 | ||
0fc89b68 | 477 | return ret; |
e941759c | 478 | } |
f54d1867 | 479 | EXPORT_SYMBOL(dma_fence_signal); |
e941759c ML |
480 | |
481 | /** | |
f54d1867 | 482 | * dma_fence_wait_timeout - sleep until the fence gets signaled |
e941759c | 483 | * or until timeout elapses |
4dd3cdb2 DV |
484 | * @fence: the fence to wait on |
485 | * @intr: if true, do an interruptible wait | |
486 | * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT | |
e941759c ML |
487 | * |
488 | * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the | |
489 | * remaining timeout in jiffies on success. Other error values may be | |
490 | * returned on custom implementations. | |
491 | * | |
492 | * Performs a synchronous wait on this fence. It is assumed the caller | |
493 | * directly or indirectly (buf-mgr between reservation and committing) | |
494 | * holds a reference to the fence, otherwise the fence might be | |
495 | * freed before return, resulting in undefined behavior. | |
4dd3cdb2 DV |
496 | * |
497 | * See also dma_fence_wait() and dma_fence_wait_any_timeout(). | |
e941759c ML |
498 | */ |
499 | signed long | |
f54d1867 | 500 | dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) |
e941759c ML |
501 | { |
502 | signed long ret; | |
503 | ||
504 | if (WARN_ON(timeout < 0)) | |
505 | return -EINVAL; | |
506 | ||
ef825550 DV |
507 | might_sleep(); |
508 | ||
5fbff813 DV |
509 | __dma_fence_might_wait(); |
510 | ||
b96fb1e7 AY |
511 | dma_fence_enable_sw_signaling(fence); |
512 | ||
f54d1867 | 513 | trace_dma_fence_wait_start(fence); |
418cc6ca DV |
514 | if (fence->ops->wait) |
515 | ret = fence->ops->wait(fence, intr, timeout); | |
516 | else | |
517 | ret = dma_fence_default_wait(fence, intr, timeout); | |
f54d1867 | 518 | trace_dma_fence_wait_end(fence); |
e941759c ML |
519 | return ret; |
520 | } | |
f54d1867 | 521 | EXPORT_SYMBOL(dma_fence_wait_timeout); |
e941759c | 522 | |
4dd3cdb2 DV |
523 | /** |
524 | * dma_fence_release - default relese function for fences | |
525 | * @kref: &dma_fence.recfount | |
526 | * | |
527 | * This is the default release functions for &dma_fence. Drivers shouldn't call | |
528 | * this directly, but instead call dma_fence_put(). | |
529 | */ | |
f54d1867 | 530 | void dma_fence_release(struct kref *kref) |
e941759c | 531 | { |
f54d1867 CW |
532 | struct dma_fence *fence = |
533 | container_of(kref, struct dma_fence, refcount); | |
e941759c | 534 | |
f54d1867 | 535 | trace_dma_fence_destroy(fence); |
e941759c | 536 | |
f2cb60e9 CW |
537 | if (WARN(!list_empty(&fence->cb_list) && |
538 | !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags), | |
427231bc CW |
539 | "Fence %s:%s:%llx:%llx released with pending signals!\n", |
540 | fence->ops->get_driver_name(fence), | |
541 | fence->ops->get_timeline_name(fence), | |
542 | fence->context, fence->seqno)) { | |
543 | unsigned long flags; | |
544 | ||
545 | /* | |
546 | * Failed to signal before release, likely a refcounting issue. | |
547 | * | |
548 | * This should never happen, but if it does make sure that we | |
549 | * don't leave chains dangling. We set the error flag first | |
550 | * so that the callbacks know this signal is due to an error. | |
551 | */ | |
552 | spin_lock_irqsave(fence->lock, flags); | |
553 | fence->error = -EDEADLK; | |
554 | dma_fence_signal_locked(fence); | |
555 | spin_unlock_irqrestore(fence->lock, flags); | |
556 | } | |
e941759c ML |
557 | |
558 | if (fence->ops->release) | |
559 | fence->ops->release(fence); | |
560 | else | |
f54d1867 | 561 | dma_fence_free(fence); |
e941759c | 562 | } |
f54d1867 | 563 | EXPORT_SYMBOL(dma_fence_release); |
e941759c | 564 | |
4dd3cdb2 DV |
565 | /** |
566 | * dma_fence_free - default release function for &dma_fence. | |
567 | * @fence: fence to release | |
568 | * | |
569 | * This is the default implementation for &dma_fence_ops.release. It calls | |
570 | * kfree_rcu() on @fence. | |
571 | */ | |
f54d1867 | 572 | void dma_fence_free(struct dma_fence *fence) |
e941759c | 573 | { |
3c3b177a | 574 | kfree_rcu(fence, rcu); |
e941759c | 575 | } |
f54d1867 | 576 | EXPORT_SYMBOL(dma_fence_free); |
e941759c | 577 | |
9c98f021 CW |
578 | static bool __dma_fence_enable_signaling(struct dma_fence *fence) |
579 | { | |
580 | bool was_set; | |
581 | ||
582 | lockdep_assert_held(fence->lock); | |
583 | ||
584 | was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, | |
585 | &fence->flags); | |
586 | ||
587 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) | |
588 | return false; | |
589 | ||
590 | if (!was_set && fence->ops->enable_signaling) { | |
591 | trace_dma_fence_enable_signal(fence); | |
592 | ||
593 | if (!fence->ops->enable_signaling(fence)) { | |
594 | dma_fence_signal_locked(fence); | |
595 | return false; | |
596 | } | |
597 | } | |
598 | ||
599 | return true; | |
600 | } | |
601 | ||
e941759c | 602 | /** |
f54d1867 | 603 | * dma_fence_enable_sw_signaling - enable signaling on fence |
4dd3cdb2 | 604 | * @fence: the fence to enable |
e941759c | 605 | * |
4dd3cdb2 DV |
606 | * This will request for sw signaling to be enabled, to make the fence |
607 | * complete as soon as possible. This calls &dma_fence_ops.enable_signaling | |
608 | * internally. | |
e941759c | 609 | */ |
f54d1867 | 610 | void dma_fence_enable_sw_signaling(struct dma_fence *fence) |
e941759c ML |
611 | { |
612 | unsigned long flags; | |
613 | ||
9c98f021 CW |
614 | spin_lock_irqsave(fence->lock, flags); |
615 | __dma_fence_enable_signaling(fence); | |
616 | spin_unlock_irqrestore(fence->lock, flags); | |
e941759c | 617 | } |
f54d1867 | 618 | EXPORT_SYMBOL(dma_fence_enable_sw_signaling); |
e941759c ML |
619 | |
620 | /** | |
f54d1867 | 621 | * dma_fence_add_callback - add a callback to be called when the fence |
e941759c | 622 | * is signaled |
4dd3cdb2 DV |
623 | * @fence: the fence to wait on |
624 | * @cb: the callback to register | |
625 | * @func: the function to call | |
e941759c | 626 | * |
044e55b1 CK |
627 | * Add a software callback to the fence. The caller should keep a reference to |
628 | * the fence. | |
629 | * | |
4dd3cdb2 | 630 | * @cb will be initialized by dma_fence_add_callback(), no initialization |
e941759c ML |
631 | * by the caller is required. Any number of callbacks can be registered |
632 | * to a fence, but a callback can only be registered to one fence at a time. | |
633 | * | |
044e55b1 | 634 | * If fence is already signaled, this function will return -ENOENT (and |
4dd3cdb2 | 635 | * *not* call the callback). |
e941759c | 636 | * |
044e55b1 | 637 | * Note that the callback can be called from an atomic context or irq context. |
e941759c | 638 | * |
f642de16 GP |
639 | * Returns 0 in case of success, -ENOENT if the fence is already signaled |
640 | * and -EINVAL in case of error. | |
e941759c | 641 | */ |
f54d1867 CW |
642 | int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, |
643 | dma_fence_func_t func) | |
e941759c ML |
644 | { |
645 | unsigned long flags; | |
646 | int ret = 0; | |
e941759c ML |
647 | |
648 | if (WARN_ON(!fence || !func)) | |
649 | return -EINVAL; | |
650 | ||
f54d1867 | 651 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { |
e941759c ML |
652 | INIT_LIST_HEAD(&cb->node); |
653 | return -ENOENT; | |
654 | } | |
655 | ||
656 | spin_lock_irqsave(fence->lock, flags); | |
657 | ||
9c98f021 | 658 | if (__dma_fence_enable_signaling(fence)) { |
e941759c ML |
659 | cb->func = func; |
660 | list_add_tail(&cb->node, &fence->cb_list); | |
9c98f021 | 661 | } else { |
e941759c | 662 | INIT_LIST_HEAD(&cb->node); |
9c98f021 CW |
663 | ret = -ENOENT; |
664 | } | |
665 | ||
e941759c ML |
666 | spin_unlock_irqrestore(fence->lock, flags); |
667 | ||
668 | return ret; | |
669 | } | |
f54d1867 | 670 | EXPORT_SYMBOL(dma_fence_add_callback); |
e941759c | 671 | |
d6c99f4b CW |
672 | /** |
673 | * dma_fence_get_status - returns the status upon completion | |
4dd3cdb2 | 674 | * @fence: the dma_fence to query |
d6c99f4b CW |
675 | * |
676 | * This wraps dma_fence_get_status_locked() to return the error status | |
677 | * condition on a signaled fence. See dma_fence_get_status_locked() for more | |
678 | * details. | |
679 | * | |
680 | * Returns 0 if the fence has not yet been signaled, 1 if the fence has | |
681 | * been signaled without an error condition, or a negative error code | |
682 | * if the fence has been completed in err. | |
683 | */ | |
684 | int dma_fence_get_status(struct dma_fence *fence) | |
685 | { | |
686 | unsigned long flags; | |
687 | int status; | |
688 | ||
689 | spin_lock_irqsave(fence->lock, flags); | |
690 | status = dma_fence_get_status_locked(fence); | |
691 | spin_unlock_irqrestore(fence->lock, flags); | |
692 | ||
693 | return status; | |
694 | } | |
695 | EXPORT_SYMBOL(dma_fence_get_status); | |
696 | ||
e941759c | 697 | /** |
f54d1867 | 698 | * dma_fence_remove_callback - remove a callback from the signaling list |
4dd3cdb2 DV |
699 | * @fence: the fence to wait on |
700 | * @cb: the callback to remove | |
e941759c ML |
701 | * |
702 | * Remove a previously queued callback from the fence. This function returns | |
f353d71f | 703 | * true if the callback is successfully removed, or false if the fence has |
e941759c ML |
704 | * already been signaled. |
705 | * | |
706 | * *WARNING*: | |
707 | * Cancelling a callback should only be done if you really know what you're | |
708 | * doing, since deadlocks and race conditions could occur all too easily. For | |
709 | * this reason, it should only ever be done on hardware lockup recovery, | |
710 | * with a reference held to the fence. | |
4dd3cdb2 DV |
711 | * |
712 | * Behaviour is undefined if @cb has not been added to @fence using | |
713 | * dma_fence_add_callback() beforehand. | |
e941759c ML |
714 | */ |
715 | bool | |
f54d1867 | 716 | dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) |
e941759c ML |
717 | { |
718 | unsigned long flags; | |
719 | bool ret; | |
720 | ||
721 | spin_lock_irqsave(fence->lock, flags); | |
722 | ||
723 | ret = !list_empty(&cb->node); | |
724 | if (ret) | |
725 | list_del_init(&cb->node); | |
726 | ||
727 | spin_unlock_irqrestore(fence->lock, flags); | |
728 | ||
729 | return ret; | |
730 | } | |
f54d1867 | 731 | EXPORT_SYMBOL(dma_fence_remove_callback); |
e941759c ML |
732 | |
733 | struct default_wait_cb { | |
f54d1867 | 734 | struct dma_fence_cb base; |
e941759c ML |
735 | struct task_struct *task; |
736 | }; | |
737 | ||
738 | static void | |
f54d1867 | 739 | dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
e941759c ML |
740 | { |
741 | struct default_wait_cb *wait = | |
742 | container_of(cb, struct default_wait_cb, base); | |
743 | ||
744 | wake_up_state(wait->task, TASK_NORMAL); | |
745 | } | |
746 | ||
747 | /** | |
f54d1867 | 748 | * dma_fence_default_wait - default sleep until the fence gets signaled |
e941759c | 749 | * or until timeout elapses |
4dd3cdb2 DV |
750 | * @fence: the fence to wait on |
751 | * @intr: if true, do an interruptible wait | |
752 | * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT | |
e941759c ML |
753 | * |
754 | * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the | |
bcc004b6 AD |
755 | * remaining timeout in jiffies on success. If timeout is zero the value one is |
756 | * returned if the fence is already signaled for consistency with other | |
757 | * functions taking a jiffies timeout. | |
e941759c ML |
758 | */ |
759 | signed long | |
f54d1867 | 760 | dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) |
e941759c ML |
761 | { |
762 | struct default_wait_cb cb; | |
763 | unsigned long flags; | |
bcc004b6 | 764 | signed long ret = timeout ? timeout : 1; |
e941759c | 765 | |
e941759c ML |
766 | spin_lock_irqsave(fence->lock, flags); |
767 | ||
3cc3dd73 CK |
768 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
769 | goto out; | |
770 | ||
e941759c ML |
771 | if (intr && signal_pending(current)) { |
772 | ret = -ERESTARTSYS; | |
773 | goto out; | |
774 | } | |
775 | ||
03c0c5f6 AR |
776 | if (!timeout) { |
777 | ret = 0; | |
778 | goto out; | |
779 | } | |
780 | ||
f54d1867 | 781 | cb.base.func = dma_fence_default_wait_cb; |
e941759c ML |
782 | cb.task = current; |
783 | list_add(&cb.base.node, &fence->cb_list); | |
784 | ||
f54d1867 | 785 | while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { |
e941759c ML |
786 | if (intr) |
787 | __set_current_state(TASK_INTERRUPTIBLE); | |
788 | else | |
789 | __set_current_state(TASK_UNINTERRUPTIBLE); | |
790 | spin_unlock_irqrestore(fence->lock, flags); | |
791 | ||
792 | ret = schedule_timeout(ret); | |
793 | ||
794 | spin_lock_irqsave(fence->lock, flags); | |
795 | if (ret > 0 && intr && signal_pending(current)) | |
796 | ret = -ERESTARTSYS; | |
797 | } | |
798 | ||
799 | if (!list_empty(&cb.base.node)) | |
800 | list_del(&cb.base.node); | |
801 | __set_current_state(TASK_RUNNING); | |
802 | ||
803 | out: | |
804 | spin_unlock_irqrestore(fence->lock, flags); | |
805 | return ret; | |
806 | } | |
f54d1867 | 807 | EXPORT_SYMBOL(dma_fence_default_wait); |
e941759c | 808 | |
a519435a | 809 | static bool |
7392b4bb | 810 | dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, |
811 | uint32_t *idx) | |
a519435a CK |
812 | { |
813 | int i; | |
814 | ||
815 | for (i = 0; i < count; ++i) { | |
f54d1867 | 816 | struct dma_fence *fence = fences[i]; |
7392b4bb | 817 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { |
818 | if (idx) | |
819 | *idx = i; | |
a519435a | 820 | return true; |
7392b4bb | 821 | } |
a519435a CK |
822 | } |
823 | return false; | |
824 | } | |
825 | ||
826 | /** | |
f54d1867 | 827 | * dma_fence_wait_any_timeout - sleep until any fence gets signaled |
a519435a | 828 | * or until timeout elapses |
4dd3cdb2 DV |
829 | * @fences: array of fences to wait on |
830 | * @count: number of fences to wait on | |
831 | * @intr: if true, do an interruptible wait | |
832 | * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT | |
833 | * @idx: used to store the first signaled fence index, meaningful only on | |
834 | * positive return | |
a519435a CK |
835 | * |
836 | * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if | |
837 | * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies | |
838 | * on success. | |
839 | * | |
840 | * Synchronous waits for the first fence in the array to be signaled. The | |
841 | * caller needs to hold a reference to all fences in the array, otherwise a | |
842 | * fence might be freed before return, resulting in undefined behavior. | |
4dd3cdb2 DV |
843 | * |
844 | * See also dma_fence_wait() and dma_fence_wait_timeout(). | |
a519435a CK |
845 | */ |
846 | signed long | |
f54d1867 | 847 | dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, |
7392b4bb | 848 | bool intr, signed long timeout, uint32_t *idx) |
a519435a CK |
849 | { |
850 | struct default_wait_cb *cb; | |
851 | signed long ret = timeout; | |
852 | unsigned i; | |
853 | ||
854 | if (WARN_ON(!fences || !count || timeout < 0)) | |
855 | return -EINVAL; | |
856 | ||
857 | if (timeout == 0) { | |
858 | for (i = 0; i < count; ++i) | |
7392b4bb | 859 | if (dma_fence_is_signaled(fences[i])) { |
860 | if (idx) | |
861 | *idx = i; | |
a519435a | 862 | return 1; |
7392b4bb | 863 | } |
a519435a CK |
864 | |
865 | return 0; | |
866 | } | |
867 | ||
868 | cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); | |
869 | if (cb == NULL) { | |
870 | ret = -ENOMEM; | |
871 | goto err_free_cb; | |
872 | } | |
873 | ||
874 | for (i = 0; i < count; ++i) { | |
f54d1867 | 875 | struct dma_fence *fence = fences[i]; |
a519435a | 876 | |
a519435a | 877 | cb[i].task = current; |
f54d1867 CW |
878 | if (dma_fence_add_callback(fence, &cb[i].base, |
879 | dma_fence_default_wait_cb)) { | |
a519435a | 880 | /* This fence is already signaled */ |
7392b4bb | 881 | if (idx) |
882 | *idx = i; | |
a519435a CK |
883 | goto fence_rm_cb; |
884 | } | |
885 | } | |
886 | ||
887 | while (ret > 0) { | |
888 | if (intr) | |
889 | set_current_state(TASK_INTERRUPTIBLE); | |
890 | else | |
891 | set_current_state(TASK_UNINTERRUPTIBLE); | |
892 | ||
7392b4bb | 893 | if (dma_fence_test_signaled_any(fences, count, idx)) |
a519435a CK |
894 | break; |
895 | ||
896 | ret = schedule_timeout(ret); | |
897 | ||
898 | if (ret > 0 && intr && signal_pending(current)) | |
899 | ret = -ERESTARTSYS; | |
900 | } | |
901 | ||
902 | __set_current_state(TASK_RUNNING); | |
903 | ||
904 | fence_rm_cb: | |
905 | while (i-- > 0) | |
f54d1867 | 906 | dma_fence_remove_callback(fences[i], &cb[i].base); |
a519435a CK |
907 | |
908 | err_free_cb: | |
909 | kfree(cb); | |
910 | ||
911 | return ret; | |
912 | } | |
f54d1867 | 913 | EXPORT_SYMBOL(dma_fence_wait_any_timeout); |
a519435a | 914 | |
a25efb38 CK |
915 | /** |
916 | * dma_fence_describe - Dump fence describtion into seq_file | |
917 | * @fence: the 6fence to describe | |
918 | * @seq: the seq_file to put the textual description into | |
919 | * | |
920 | * Dump a textual description of the fence and it's state into the seq_file. | |
921 | */ | |
922 | void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) | |
923 | { | |
924 | seq_printf(seq, "%s %s seq %llu %ssignalled\n", | |
925 | fence->ops->get_driver_name(fence), | |
926 | fence->ops->get_timeline_name(fence), fence->seqno, | |
927 | dma_fence_is_signaled(fence) ? "" : "un"); | |
928 | } | |
929 | EXPORT_SYMBOL(dma_fence_describe); | |
930 | ||
e941759c | 931 | /** |
f54d1867 | 932 | * dma_fence_init - Initialize a custom fence. |
4dd3cdb2 DV |
933 | * @fence: the fence to initialize |
934 | * @ops: the dma_fence_ops for operations on this fence | |
935 | * @lock: the irqsafe spinlock to use for locking this fence | |
936 | * @context: the execution context this fence is run on | |
937 | * @seqno: a linear increasing sequence number for this context | |
e941759c ML |
938 | * |
939 | * Initializes an allocated fence, the caller doesn't have to keep its | |
940 | * refcount after committing with this fence, but it will need to hold a | |
4dd3cdb2 | 941 | * refcount again if &dma_fence_ops.enable_signaling gets called. |
e941759c ML |
942 | * |
943 | * context and seqno are used for easy comparison between fences, allowing | |
4dd3cdb2 | 944 | * to check which fence is later by simply using dma_fence_later(). |
e941759c ML |
945 | */ |
946 | void | |
f54d1867 | 947 | dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, |
b312d8ca | 948 | spinlock_t *lock, u64 context, u64 seqno) |
e941759c ML |
949 | { |
950 | BUG_ON(!lock); | |
418cc6ca | 951 | BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); |
e941759c ML |
952 | |
953 | kref_init(&fence->refcount); | |
954 | fence->ops = ops; | |
955 | INIT_LIST_HEAD(&fence->cb_list); | |
956 | fence->lock = lock; | |
957 | fence->context = context; | |
958 | fence->seqno = seqno; | |
959 | fence->flags = 0UL; | |
a009e975 | 960 | fence->error = 0; |
e941759c | 961 | |
f54d1867 | 962 | trace_dma_fence_init(fence); |
e941759c | 963 | } |
f54d1867 | 964 | EXPORT_SYMBOL(dma_fence_init); |