Commit | Line | Data |
---|---|---|
64d6c500 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2019 Intel Corporation | |
5 | */ | |
6 | ||
5361db1a CW |
7 | #include <linux/debugobjects.h> |
8 | ||
e6ba7648 | 9 | #include "gt/intel_context.h" |
d13a3177 | 10 | #include "gt/intel_engine_heartbeat.h" |
7009db14 | 11 | #include "gt/intel_engine_pm.h" |
2871ea85 | 12 | #include "gt/intel_ring.h" |
7009db14 | 13 | |
64d6c500 CW |
14 | #include "i915_drv.h" |
15 | #include "i915_active.h" | |
16 | ||
5f5c139d CW |
17 | /* |
18 | * Active refs memory management | |
19 | * | |
20 | * To be more economical with memory, we reap all the i915_active trees as | |
21 | * they idle (when we know the active requests are inactive) and allocate the | |
22 | * nodes from a local slab cache to hopefully reduce the fragmentation. | |
23 | */ | |
512ba03e | 24 | static struct kmem_cache *slab_cache; |
5f5c139d | 25 | |
64d6c500 | 26 | struct active_node { |
5d934137 | 27 | struct rb_node node; |
b1e3177b | 28 | struct i915_active_fence base; |
64d6c500 | 29 | struct i915_active *ref; |
64d6c500 CW |
30 | u64 timeline; |
31 | }; | |
32 | ||
5d934137 CW |
33 | #define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node) |
34 | ||
d8af05ff | 35 | static inline struct active_node * |
b1e3177b | 36 | node_from_active(struct i915_active_fence *active) |
d8af05ff CW |
37 | { |
38 | return container_of(active, struct active_node, base); | |
39 | } | |
40 | ||
41 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) | |
42 | ||
b1e3177b | 43 | static inline bool is_barrier(const struct i915_active_fence *active) |
d8af05ff | 44 | { |
b1e3177b | 45 | return IS_ERR(rcu_access_pointer(active->fence)); |
d8af05ff CW |
46 | } |
47 | ||
48 | static inline struct llist_node *barrier_to_ll(struct active_node *node) | |
49 | { | |
50 | GEM_BUG_ON(!is_barrier(&node->base)); | |
b1e3177b | 51 | return (struct llist_node *)&node->base.cb.node; |
d8af05ff CW |
52 | } |
53 | ||
f130b712 CW |
54 | static inline struct intel_engine_cs * |
55 | __barrier_to_engine(struct active_node *node) | |
56 | { | |
b1e3177b | 57 | return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); |
f130b712 CW |
58 | } |
59 | ||
d8af05ff CW |
60 | static inline struct intel_engine_cs * |
61 | barrier_to_engine(struct active_node *node) | |
62 | { | |
63 | GEM_BUG_ON(!is_barrier(&node->base)); | |
f130b712 | 64 | return __barrier_to_engine(node); |
d8af05ff CW |
65 | } |
66 | ||
67 | static inline struct active_node *barrier_from_ll(struct llist_node *x) | |
68 | { | |
69 | return container_of((struct list_head *)x, | |
b1e3177b | 70 | struct active_node, base.cb.node); |
d8af05ff CW |
71 | } |
72 | ||
5361db1a CW |
73 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
74 | ||
75 | static void *active_debug_hint(void *addr) | |
76 | { | |
77 | struct i915_active *ref = addr; | |
78 | ||
12c255b5 | 79 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
5361db1a CW |
80 | } |
81 | ||
f9e62f31 | 82 | static const struct debug_obj_descr active_debug_desc = { |
5361db1a CW |
83 | .name = "i915_active", |
84 | .debug_hint = active_debug_hint, | |
85 | }; | |
86 | ||
87 | static void debug_active_init(struct i915_active *ref) | |
88 | { | |
89 | debug_object_init(ref, &active_debug_desc); | |
90 | } | |
91 | ||
92 | static void debug_active_activate(struct i915_active *ref) | |
93 | { | |
bbca083d | 94 | lockdep_assert_held(&ref->tree_lock); |
f52c6d0d CW |
95 | if (!atomic_read(&ref->count)) /* before the first inc */ |
96 | debug_object_activate(ref, &active_debug_desc); | |
5361db1a CW |
97 | } |
98 | ||
99 | static void debug_active_deactivate(struct i915_active *ref) | |
100 | { | |
c9ad602f | 101 | lockdep_assert_held(&ref->tree_lock); |
f52c6d0d CW |
102 | if (!atomic_read(&ref->count)) /* after the last dec */ |
103 | debug_object_deactivate(ref, &active_debug_desc); | |
5361db1a CW |
104 | } |
105 | ||
106 | static void debug_active_fini(struct i915_active *ref) | |
107 | { | |
108 | debug_object_free(ref, &active_debug_desc); | |
109 | } | |
110 | ||
111 | static void debug_active_assert(struct i915_active *ref) | |
112 | { | |
113 | debug_object_assert_init(ref, &active_debug_desc); | |
114 | } | |
115 | ||
116 | #else | |
117 | ||
118 | static inline void debug_active_init(struct i915_active *ref) { } | |
119 | static inline void debug_active_activate(struct i915_active *ref) { } | |
120 | static inline void debug_active_deactivate(struct i915_active *ref) { } | |
121 | static inline void debug_active_fini(struct i915_active *ref) { } | |
122 | static inline void debug_active_assert(struct i915_active *ref) { } | |
123 | ||
124 | #endif | |
125 | ||
a42375af | 126 | static void |
12c255b5 | 127 | __active_retire(struct i915_active *ref) |
a42375af | 128 | { |
99a7f4da | 129 | struct rb_root root = RB_ROOT; |
a42375af | 130 | struct active_node *it, *n; |
c9ad602f | 131 | unsigned long flags; |
12c255b5 | 132 | |
274cbf20 | 133 | GEM_BUG_ON(i915_active_is_idle(ref)); |
12c255b5 CW |
134 | |
135 | /* return the unused nodes to our slabcache -- flushing the allocator */ | |
c9ad602f | 136 | if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) |
12c255b5 CW |
137 | return; |
138 | ||
b1e3177b | 139 | GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); |
c9ad602f CW |
140 | debug_active_deactivate(ref); |
141 | ||
99a7f4da CW |
142 | /* Even if we have not used the cache, we may still have a barrier */ |
143 | if (!ref->cache) | |
144 | ref->cache = fetch_node(ref->tree.rb_node); | |
145 | ||
146 | /* Keep the MRU cached node for reuse */ | |
147 | if (ref->cache) { | |
148 | /* Discard all other nodes in the tree */ | |
149 | rb_erase(&ref->cache->node, &ref->tree); | |
150 | root = ref->tree; | |
151 | ||
152 | /* Rebuild the tree with only the cached node */ | |
153 | rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); | |
154 | rb_insert_color(&ref->cache->node, &ref->tree); | |
155 | GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); | |
e28860ae CW |
156 | |
157 | /* Make the cached node available for reuse with any timeline */ | |
5e963508 | 158 | ref->cache->timeline = 0; /* needs cmpxchg(u64) */ |
99a7f4da | 159 | } |
c9ad602f CW |
160 | |
161 | spin_unlock_irqrestore(&ref->tree_lock, flags); | |
e1d7b66b CW |
162 | |
163 | /* After the final retire, the entire struct may be freed */ | |
164 | if (ref->retire) | |
165 | ref->retire(ref); | |
b1e3177b CW |
166 | |
167 | /* ... except if you wait on it, you must manage your own references! */ | |
168 | wake_up_var(ref); | |
c9ad602f | 169 | |
99a7f4da | 170 | /* Finally free the discarded timeline tree */ |
c9ad602f CW |
171 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { |
172 | GEM_BUG_ON(i915_active_fence_isset(&it->base)); | |
512ba03e | 173 | kmem_cache_free(slab_cache, it); |
c9ad602f | 174 | } |
a42375af CW |
175 | } |
176 | ||
274cbf20 CW |
177 | static void |
178 | active_work(struct work_struct *wrk) | |
179 | { | |
180 | struct i915_active *ref = container_of(wrk, typeof(*ref), work); | |
181 | ||
182 | GEM_BUG_ON(!atomic_read(&ref->count)); | |
183 | if (atomic_add_unless(&ref->count, -1, 1)) | |
184 | return; | |
185 | ||
274cbf20 CW |
186 | __active_retire(ref); |
187 | } | |
188 | ||
64d6c500 | 189 | static void |
12c255b5 | 190 | active_retire(struct i915_active *ref) |
64d6c500 | 191 | { |
12c255b5 CW |
192 | GEM_BUG_ON(!atomic_read(&ref->count)); |
193 | if (atomic_add_unless(&ref->count, -1, 1)) | |
a42375af CW |
194 | return; |
195 | ||
c9ad602f | 196 | if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { |
274cbf20 CW |
197 | queue_work(system_unbound_wq, &ref->work); |
198 | return; | |
199 | } | |
200 | ||
12c255b5 | 201 | __active_retire(ref); |
64d6c500 CW |
202 | } |
203 | ||
df9f85d8 CW |
204 | static inline struct dma_fence ** |
205 | __active_fence_slot(struct i915_active_fence *active) | |
206 | { | |
207 | return (struct dma_fence ** __force)&active->fence; | |
208 | } | |
209 | ||
210 | static inline bool | |
211 | active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) | |
212 | { | |
213 | struct i915_active_fence *active = | |
214 | container_of(cb, typeof(*active), cb); | |
215 | ||
216 | return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; | |
217 | } | |
218 | ||
64d6c500 | 219 | static void |
b1e3177b | 220 | node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) |
64d6c500 | 221 | { |
df9f85d8 CW |
222 | if (active_fence_cb(fence, cb)) |
223 | active_retire(container_of(cb, struct active_node, base.cb)->ref); | |
64d6c500 CW |
224 | } |
225 | ||
b1e3177b CW |
226 | static void |
227 | excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) | |
228 | { | |
df9f85d8 CW |
229 | if (active_fence_cb(fence, cb)) |
230 | active_retire(container_of(cb, struct i915_active, excl.cb)); | |
b1e3177b CW |
231 | } |
232 | ||
5d934137 | 233 | static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) |
64d6c500 | 234 | { |
5d934137 | 235 | struct active_node *it; |
64d6c500 | 236 | |
e28860ae | 237 | GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */ |
64d6c500 CW |
238 | |
239 | /* | |
240 | * We track the most recently used timeline to skip a rbtree search | |
241 | * for the common case, under typical loads we never need the rbtree | |
242 | * at all. We can reuse the last slot if it is empty, that is | |
243 | * after the previous activity has been retired, or if it matches the | |
244 | * current timeline. | |
64d6c500 | 245 | */ |
5d934137 | 246 | it = READ_ONCE(ref->cache); |
e28860ae CW |
247 | if (it) { |
248 | u64 cached = READ_ONCE(it->timeline); | |
249 | ||
250 | /* Once claimed, this slot will only belong to this idx */ | |
251 | if (cached == idx) | |
252 | return it; | |
253 | ||
e28860ae CW |
254 | /* |
255 | * An unclaimed cache [.timeline=0] can only be claimed once. | |
256 | * | |
257 | * If the value is already non-zero, some other thread has | |
258 | * claimed the cache and we know that is does not match our | |
259 | * idx. If, and only if, the timeline is currently zero is it | |
260 | * worth competing to claim it atomically for ourselves (for | |
261 | * only the winner of that race will cmpxchg return the old | |
262 | * value of 0). | |
263 | */ | |
5e963508 | 264 | if (!cached && !cmpxchg64(&it->timeline, 0, idx)) |
e28860ae | 265 | return it; |
e28860ae | 266 | } |
5d934137 CW |
267 | |
268 | BUILD_BUG_ON(offsetof(typeof(*it), node)); | |
269 | ||
270 | /* While active, the tree can only be built; not destroyed */ | |
271 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
272 | ||
273 | it = fetch_node(ref->tree.rb_node); | |
274 | while (it) { | |
275 | if (it->timeline < idx) { | |
276 | it = fetch_node(it->node.rb_right); | |
277 | } else if (it->timeline > idx) { | |
278 | it = fetch_node(it->node.rb_left); | |
279 | } else { | |
280 | WRITE_ONCE(ref->cache, it); | |
281 | break; | |
282 | } | |
283 | } | |
284 | ||
285 | /* NB: If the tree rotated beneath us, we may miss our target. */ | |
286 | return it; | |
287 | } | |
288 | ||
289 | static struct i915_active_fence * | |
290 | active_instance(struct i915_active *ref, u64 idx) | |
291 | { | |
bfaae47d | 292 | struct active_node *node; |
5d934137 CW |
293 | struct rb_node **p, *parent; |
294 | ||
295 | node = __active_lookup(ref, idx); | |
296 | if (likely(node)) | |
12c255b5 CW |
297 | return &node->base; |
298 | ||
c9ad602f | 299 | spin_lock_irq(&ref->tree_lock); |
12c255b5 | 300 | GEM_BUG_ON(i915_active_is_idle(ref)); |
64d6c500 CW |
301 | |
302 | parent = NULL; | |
303 | p = &ref->tree.rb_node; | |
304 | while (*p) { | |
305 | parent = *p; | |
306 | ||
307 | node = rb_entry(parent, struct active_node, node); | |
bfaae47d | 308 | if (node->timeline == idx) |
12c255b5 | 309 | goto out; |
64d6c500 CW |
310 | |
311 | if (node->timeline < idx) | |
312 | p = &parent->rb_right; | |
313 | else | |
314 | p = &parent->rb_left; | |
315 | } | |
316 | ||
bfaae47d ML |
317 | /* |
318 | * XXX: We should preallocate this before i915_active_ref() is ever | |
319 | * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC. | |
320 | */ | |
512ba03e | 321 | node = kmem_cache_alloc(slab_cache, GFP_ATOMIC); |
bfaae47d ML |
322 | if (!node) |
323 | goto out; | |
324 | ||
df9f85d8 | 325 | __i915_active_fence_init(&node->base, NULL, node_retire); |
64d6c500 CW |
326 | node->ref = ref; |
327 | node->timeline = idx; | |
328 | ||
329 | rb_link_node(&node->node, parent, p); | |
330 | rb_insert_color(&node->node, &ref->tree); | |
331 | ||
64d6c500 | 332 | out: |
5d934137 | 333 | WRITE_ONCE(ref->cache, node); |
c9ad602f | 334 | spin_unlock_irq(&ref->tree_lock); |
12c255b5 CW |
335 | |
336 | return &node->base; | |
64d6c500 CW |
337 | } |
338 | ||
b1e3177b | 339 | void __i915_active_init(struct i915_active *ref, |
12c255b5 CW |
340 | int (*active)(struct i915_active *ref), |
341 | void (*retire)(struct i915_active *ref), | |
c3b14760 | 342 | unsigned long flags, |
ae303004 CW |
343 | struct lock_class_key *mkey, |
344 | struct lock_class_key *wkey) | |
64d6c500 | 345 | { |
5361db1a CW |
346 | debug_active_init(ref); |
347 | ||
c3b14760 | 348 | ref->flags = flags; |
12c255b5 | 349 | ref->active = active; |
c3b14760 | 350 | ref->retire = retire; |
2850748e | 351 | |
c9ad602f | 352 | spin_lock_init(&ref->tree_lock); |
64d6c500 | 353 | ref->tree = RB_ROOT; |
12c255b5 | 354 | ref->cache = NULL; |
c9ad602f | 355 | |
d8af05ff | 356 | init_llist_head(&ref->preallocated_barriers); |
12c255b5 | 357 | atomic_set(&ref->count, 0); |
ae303004 | 358 | __mutex_init(&ref->mutex, "i915_active", mkey); |
df9f85d8 | 359 | __i915_active_fence_init(&ref->excl, NULL, excl_retire); |
274cbf20 | 360 | INIT_WORK(&ref->work, active_work); |
ae303004 CW |
361 | #if IS_ENABLED(CONFIG_LOCKDEP) |
362 | lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); | |
363 | #endif | |
64d6c500 CW |
364 | } |
365 | ||
f130b712 CW |
366 | static bool ____active_del_barrier(struct i915_active *ref, |
367 | struct active_node *node, | |
368 | struct intel_engine_cs *engine) | |
369 | ||
d8af05ff | 370 | { |
d8af05ff CW |
371 | struct llist_node *head = NULL, *tail = NULL; |
372 | struct llist_node *pos, *next; | |
373 | ||
75d0a7f3 | 374 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); |
d8af05ff CW |
375 | |
376 | /* | |
377 | * Rebuild the llist excluding our node. We may perform this | |
378 | * outside of the kernel_context timeline mutex and so someone | |
379 | * else may be manipulating the engine->barrier_tasks, in | |
380 | * which case either we or they will be upset :) | |
381 | * | |
382 | * A second __active_del_barrier() will report failure to claim | |
383 | * the active_node and the caller will just shrug and know not to | |
384 | * claim ownership of its node. | |
385 | * | |
386 | * A concurrent i915_request_add_active_barriers() will miss adding | |
387 | * any of the tasks, but we will try again on the next -- and since | |
388 | * we are actively using the barrier, we know that there will be | |
389 | * at least another opportunity when we idle. | |
390 | */ | |
391 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { | |
392 | if (node == barrier_from_ll(pos)) { | |
393 | node = NULL; | |
394 | continue; | |
395 | } | |
396 | ||
397 | pos->next = head; | |
398 | head = pos; | |
399 | if (!tail) | |
400 | tail = pos; | |
401 | } | |
402 | if (head) | |
403 | llist_add_batch(head, tail, &engine->barrier_tasks); | |
404 | ||
405 | return !node; | |
406 | } | |
407 | ||
f130b712 CW |
408 | static bool |
409 | __active_del_barrier(struct i915_active *ref, struct active_node *node) | |
410 | { | |
411 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); | |
412 | } | |
413 | ||
5d934137 CW |
414 | static bool |
415 | replace_barrier(struct i915_active *ref, struct i915_active_fence *active) | |
416 | { | |
417 | if (!is_barrier(active)) /* proto-node used by our idle barrier? */ | |
418 | return false; | |
419 | ||
420 | /* | |
421 | * This request is on the kernel_context timeline, and so | |
422 | * we can use it to substitute for the pending idle-barrer | |
423 | * request that we want to emit on the kernel_context. | |
424 | */ | |
425 | __active_del_barrier(ref, node_from_active(active)); | |
426 | return true; | |
427 | } | |
428 | ||
ad5c99e0 | 429 | int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) |
64d6c500 | 430 | { |
ad5c99e0 | 431 | struct dma_fence *fence = &rq->fence; |
b1e3177b | 432 | struct i915_active_fence *active; |
12c255b5 | 433 | int err; |
312c4ba1 CW |
434 | |
435 | /* Prevent reaping in case we malloc/wait while building the tree */ | |
12c255b5 CW |
436 | err = i915_active_acquire(ref); |
437 | if (err) | |
438 | return err; | |
64d6c500 | 439 | |
ad5c99e0 | 440 | active = active_instance(ref, i915_request_timeline(rq)->fence_context); |
12c255b5 CW |
441 | if (!active) { |
442 | err = -ENOMEM; | |
312c4ba1 CW |
443 | goto out; |
444 | } | |
64d6c500 | 445 | |
5d934137 | 446 | if (replace_barrier(ref, active)) { |
b1e3177b CW |
447 | RCU_INIT_POINTER(active->fence, NULL); |
448 | atomic_dec(&ref->count); | |
d8af05ff | 449 | } |
b1e3177b | 450 | if (!__i915_active_fence_set(active, fence)) |
5d934137 | 451 | __i915_active_acquire(ref); |
64d6c500 | 452 | |
312c4ba1 CW |
453 | out: |
454 | i915_active_release(ref); | |
455 | return err; | |
64d6c500 CW |
456 | } |
457 | ||
5d934137 CW |
458 | static struct dma_fence * |
459 | __i915_active_set_fence(struct i915_active *ref, | |
460 | struct i915_active_fence *active, | |
461 | struct dma_fence *fence) | |
2850748e | 462 | { |
e3793468 CW |
463 | struct dma_fence *prev; |
464 | ||
5d934137 CW |
465 | if (replace_barrier(ref, active)) { |
466 | RCU_INIT_POINTER(active->fence, fence); | |
467 | return NULL; | |
468 | } | |
2850748e | 469 | |
30ca04e1 | 470 | rcu_read_lock(); |
5d934137 | 471 | prev = __i915_active_fence_set(active, fence); |
30ca04e1 CW |
472 | if (prev) |
473 | prev = dma_fence_get_rcu(prev); | |
474 | else | |
5d934137 | 475 | __i915_active_acquire(ref); |
30ca04e1 | 476 | rcu_read_unlock(); |
e3793468 CW |
477 | |
478 | return prev; | |
b1e3177b | 479 | } |
2850748e | 480 | |
5d934137 CW |
481 | struct dma_fence * |
482 | i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) | |
483 | { | |
484 | /* We expect the caller to manage the exclusive timeline ordering */ | |
485 | return __i915_active_set_fence(ref, &ref->excl, f); | |
486 | } | |
487 | ||
b1e3177b CW |
488 | bool i915_active_acquire_if_busy(struct i915_active *ref) |
489 | { | |
490 | debug_active_assert(ref); | |
491 | return atomic_add_unless(&ref->count, 1, 0); | |
2850748e CW |
492 | } |
493 | ||
04240e30 CW |
494 | static void __i915_active_activate(struct i915_active *ref) |
495 | { | |
496 | spin_lock_irq(&ref->tree_lock); /* __active_retire() */ | |
497 | if (!atomic_fetch_inc(&ref->count)) | |
498 | debug_active_activate(ref); | |
499 | spin_unlock_irq(&ref->tree_lock); | |
500 | } | |
501 | ||
12c255b5 | 502 | int i915_active_acquire(struct i915_active *ref) |
64d6c500 | 503 | { |
12c255b5 CW |
504 | int err; |
505 | ||
b1e3177b | 506 | if (i915_active_acquire_if_busy(ref)) |
12c255b5 | 507 | return 0; |
5361db1a | 508 | |
04240e30 CW |
509 | if (!ref->active) { |
510 | __i915_active_activate(ref); | |
511 | return 0; | |
512 | } | |
513 | ||
12c255b5 CW |
514 | err = mutex_lock_interruptible(&ref->mutex); |
515 | if (err) | |
516 | return err; | |
5361db1a | 517 | |
ac0e331a | 518 | if (likely(!i915_active_acquire_if_busy(ref))) { |
04240e30 CW |
519 | err = ref->active(ref); |
520 | if (!err) | |
521 | __i915_active_activate(ref); | |
12c255b5 CW |
522 | } |
523 | ||
524 | mutex_unlock(&ref->mutex); | |
525 | ||
526 | return err; | |
64d6c500 CW |
527 | } |
528 | ||
5d934137 CW |
529 | int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) |
530 | { | |
531 | struct i915_active_fence *active; | |
532 | int err; | |
533 | ||
534 | err = i915_active_acquire(ref); | |
535 | if (err) | |
536 | return err; | |
537 | ||
538 | active = active_instance(ref, idx); | |
539 | if (!active) { | |
540 | i915_active_release(ref); | |
541 | return -ENOMEM; | |
542 | } | |
543 | ||
544 | return 0; /* return with active ref */ | |
545 | } | |
546 | ||
64d6c500 CW |
547 | void i915_active_release(struct i915_active *ref) |
548 | { | |
5361db1a | 549 | debug_active_assert(ref); |
12c255b5 | 550 | active_retire(ref); |
64d6c500 CW |
551 | } |
552 | ||
b1e3177b | 553 | static void enable_signaling(struct i915_active_fence *active) |
79c7a28e | 554 | { |
b1e3177b | 555 | struct dma_fence *fence; |
79c7a28e | 556 | |
c0e31018 CW |
557 | if (unlikely(is_barrier(active))) |
558 | return; | |
559 | ||
b1e3177b CW |
560 | fence = i915_active_fence_get(active); |
561 | if (!fence) | |
562 | return; | |
2850748e | 563 | |
b1e3177b CW |
564 | dma_fence_enable_sw_signaling(fence); |
565 | dma_fence_put(fence); | |
2850748e CW |
566 | } |
567 | ||
d13a3177 | 568 | static int flush_barrier(struct active_node *it) |
64d6c500 | 569 | { |
d13a3177 | 570 | struct intel_engine_cs *engine; |
64d6c500 | 571 | |
d13a3177 CW |
572 | if (likely(!is_barrier(&it->base))) |
573 | return 0; | |
12c255b5 | 574 | |
d13a3177 CW |
575 | engine = __barrier_to_engine(it); |
576 | smp_rmb(); /* serialise with add_active_barriers */ | |
577 | if (!is_barrier(&it->base)) | |
12c255b5 | 578 | return 0; |
2850748e | 579 | |
d13a3177 CW |
580 | return intel_engine_flush_barriers(engine); |
581 | } | |
582 | ||
583 | static int flush_lazy_signals(struct i915_active *ref) | |
584 | { | |
585 | struct active_node *it, *n; | |
586 | int err = 0; | |
587 | ||
b1e3177b | 588 | enable_signaling(&ref->excl); |
64d6c500 | 589 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
d13a3177 CW |
590 | err = flush_barrier(it); /* unconnected idle barrier? */ |
591 | if (err) | |
592 | break; | |
d8af05ff | 593 | |
b1e3177b | 594 | enable_signaling(&it->base); |
64d6c500 CW |
595 | } |
596 | ||
d13a3177 CW |
597 | return err; |
598 | } | |
599 | ||
d75a92a8 | 600 | int __i915_active_wait(struct i915_active *ref, int state) |
d13a3177 | 601 | { |
d13a3177 CW |
602 | might_sleep(); |
603 | ||
d13a3177 | 604 | /* Any fence added after the wait begins will not be auto-signaled */ |
f6e98a18 CW |
605 | if (i915_active_acquire_if_busy(ref)) { |
606 | int err; | |
afd1bcd4 | 607 | |
f6e98a18 CW |
608 | err = flush_lazy_signals(ref); |
609 | i915_active_release(ref); | |
610 | if (err) | |
611 | return err; | |
79c7a28e | 612 | |
f6e98a18 CW |
613 | if (___wait_var_event(ref, i915_active_is_idle(ref), |
614 | state, 0, 0, schedule())) | |
615 | return -EINTR; | |
616 | } | |
617 | ||
618 | /* | |
619 | * After the wait is complete, the caller may free the active. | |
620 | * We have to flush any concurrent retirement before returning. | |
621 | */ | |
e1cda6a5 | 622 | flush_work(&ref->work); |
afd1bcd4 | 623 | return 0; |
64d6c500 CW |
624 | } |
625 | ||
29e6ecf3 CW |
626 | static int __await_active(struct i915_active_fence *active, |
627 | int (*fn)(void *arg, struct dma_fence *fence), | |
628 | void *arg) | |
629 | { | |
630 | struct dma_fence *fence; | |
631 | ||
632 | if (is_barrier(active)) /* XXX flush the barrier? */ | |
633 | return 0; | |
634 | ||
635 | fence = i915_active_fence_get(active); | |
636 | if (fence) { | |
637 | int err; | |
638 | ||
639 | err = fn(arg, fence); | |
640 | dma_fence_put(fence); | |
641 | if (err < 0) | |
642 | return err; | |
643 | } | |
644 | ||
645 | return 0; | |
646 | } | |
647 | ||
3b0a0579 CW |
648 | struct wait_barrier { |
649 | struct wait_queue_entry base; | |
650 | struct i915_active *ref; | |
651 | }; | |
652 | ||
653 | static int | |
654 | barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key) | |
655 | { | |
656 | struct wait_barrier *wb = container_of(wq, typeof(*wb), base); | |
657 | ||
658 | if (i915_active_is_idle(wb->ref)) { | |
659 | list_del(&wq->entry); | |
660 | i915_sw_fence_complete(wq->private); | |
661 | kfree(wq); | |
662 | } | |
663 | ||
664 | return 0; | |
665 | } | |
666 | ||
667 | static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence) | |
668 | { | |
669 | struct wait_barrier *wb; | |
670 | ||
671 | wb = kmalloc(sizeof(*wb), GFP_KERNEL); | |
672 | if (unlikely(!wb)) | |
673 | return -ENOMEM; | |
674 | ||
675 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
676 | if (!i915_sw_fence_await(fence)) { | |
677 | kfree(wb); | |
678 | return -EINVAL; | |
679 | } | |
680 | ||
681 | wb->base.flags = 0; | |
682 | wb->base.func = barrier_wake; | |
683 | wb->base.private = fence; | |
684 | wb->ref = ref; | |
685 | ||
686 | add_wait_queue(__var_waitqueue(ref), &wb->base); | |
687 | return 0; | |
688 | } | |
689 | ||
29e6ecf3 CW |
690 | static int await_active(struct i915_active *ref, |
691 | unsigned int flags, | |
692 | int (*fn)(void *arg, struct dma_fence *fence), | |
3b0a0579 | 693 | void *arg, struct i915_sw_fence *barrier) |
64d6c500 | 694 | { |
2850748e | 695 | int err = 0; |
64d6c500 | 696 | |
3b0a0579 CW |
697 | if (!i915_active_acquire_if_busy(ref)) |
698 | return 0; | |
699 | ||
442dbc5c CW |
700 | if (flags & I915_ACTIVE_AWAIT_EXCL && |
701 | rcu_access_pointer(ref->excl.fence)) { | |
29e6ecf3 CW |
702 | err = __await_active(&ref->excl, fn, arg); |
703 | if (err) | |
3b0a0579 | 704 | goto out; |
29e6ecf3 CW |
705 | } |
706 | ||
3b0a0579 | 707 | if (flags & I915_ACTIVE_AWAIT_ACTIVE) { |
29e6ecf3 CW |
708 | struct active_node *it, *n; |
709 | ||
710 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { | |
711 | err = __await_active(&it->base, fn, arg); | |
712 | if (err) | |
3b0a0579 | 713 | goto out; |
2850748e | 714 | } |
3b0a0579 CW |
715 | } |
716 | ||
717 | if (flags & I915_ACTIVE_AWAIT_BARRIER) { | |
718 | err = flush_lazy_signals(ref); | |
29e6ecf3 | 719 | if (err) |
3b0a0579 CW |
720 | goto out; |
721 | ||
722 | err = __await_barrier(ref, barrier); | |
723 | if (err) | |
724 | goto out; | |
64d6c500 CW |
725 | } |
726 | ||
3b0a0579 CW |
727 | out: |
728 | i915_active_release(ref); | |
729 | return err; | |
29e6ecf3 CW |
730 | } |
731 | ||
732 | static int rq_await_fence(void *arg, struct dma_fence *fence) | |
733 | { | |
734 | return i915_request_await_dma_fence(arg, fence); | |
735 | } | |
2850748e | 736 | |
29e6ecf3 CW |
737 | int i915_request_await_active(struct i915_request *rq, |
738 | struct i915_active *ref, | |
739 | unsigned int flags) | |
740 | { | |
3b0a0579 | 741 | return await_active(ref, flags, rq_await_fence, rq, &rq->submit); |
29e6ecf3 CW |
742 | } |
743 | ||
744 | static int sw_await_fence(void *arg, struct dma_fence *fence) | |
745 | { | |
746 | return i915_sw_fence_await_dma_fence(arg, fence, 0, | |
747 | GFP_NOWAIT | __GFP_NOWARN); | |
748 | } | |
749 | ||
750 | int i915_sw_fence_await_active(struct i915_sw_fence *fence, | |
751 | struct i915_active *ref, | |
752 | unsigned int flags) | |
753 | { | |
3b0a0579 | 754 | return await_active(ref, flags, sw_await_fence, fence, fence); |
64d6c500 CW |
755 | } |
756 | ||
757 | void i915_active_fini(struct i915_active *ref) | |
758 | { | |
5361db1a | 759 | debug_active_fini(ref); |
12c255b5 | 760 | GEM_BUG_ON(atomic_read(&ref->count)); |
274cbf20 | 761 | GEM_BUG_ON(work_pending(&ref->work)); |
12c255b5 | 762 | mutex_destroy(&ref->mutex); |
99a7f4da CW |
763 | |
764 | if (ref->cache) | |
512ba03e | 765 | kmem_cache_free(slab_cache, ref->cache); |
64d6c500 CW |
766 | } |
767 | ||
d8af05ff CW |
768 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) |
769 | { | |
b1e3177b | 770 | return node->timeline == idx && !i915_active_fence_isset(&node->base); |
d8af05ff CW |
771 | } |
772 | ||
773 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) | |
774 | { | |
775 | struct rb_node *prev, *p; | |
776 | ||
777 | if (RB_EMPTY_ROOT(&ref->tree)) | |
778 | return NULL; | |
779 | ||
d8af05ff CW |
780 | GEM_BUG_ON(i915_active_is_idle(ref)); |
781 | ||
782 | /* | |
783 | * Try to reuse any existing barrier nodes already allocated for this | |
784 | * i915_active, due to overlapping active phases there is likely a | |
785 | * node kept alive (as we reuse before parking). We prefer to reuse | |
786 | * completely idle barriers (less hassle in manipulating the llists), | |
787 | * but otherwise any will do. | |
788 | */ | |
789 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { | |
790 | p = &ref->cache->node; | |
791 | goto match; | |
792 | } | |
793 | ||
794 | prev = NULL; | |
795 | p = ref->tree.rb_node; | |
796 | while (p) { | |
797 | struct active_node *node = | |
798 | rb_entry(p, struct active_node, node); | |
799 | ||
800 | if (is_idle_barrier(node, idx)) | |
801 | goto match; | |
802 | ||
803 | prev = p; | |
804 | if (node->timeline < idx) | |
9ff33bbc | 805 | p = READ_ONCE(p->rb_right); |
d8af05ff | 806 | else |
9ff33bbc | 807 | p = READ_ONCE(p->rb_left); |
d8af05ff CW |
808 | } |
809 | ||
810 | /* | |
811 | * No quick match, but we did find the leftmost rb_node for the | |
812 | * kernel_context. Walk the rb_tree in-order to see if there were | |
813 | * any idle-barriers on this timeline that we missed, or just use | |
814 | * the first pending barrier. | |
815 | */ | |
816 | for (p = prev; p; p = rb_next(p)) { | |
817 | struct active_node *node = | |
818 | rb_entry(p, struct active_node, node); | |
f130b712 | 819 | struct intel_engine_cs *engine; |
d8af05ff CW |
820 | |
821 | if (node->timeline > idx) | |
822 | break; | |
823 | ||
824 | if (node->timeline < idx) | |
825 | continue; | |
826 | ||
827 | if (is_idle_barrier(node, idx)) | |
828 | goto match; | |
829 | ||
830 | /* | |
831 | * The list of pending barriers is protected by the | |
832 | * kernel_context timeline, which notably we do not hold | |
833 | * here. i915_request_add_active_barriers() may consume | |
834 | * the barrier before we claim it, so we have to check | |
835 | * for success. | |
836 | */ | |
f130b712 CW |
837 | engine = __barrier_to_engine(node); |
838 | smp_rmb(); /* serialise with add_active_barriers */ | |
839 | if (is_barrier(&node->base) && | |
840 | ____active_del_barrier(ref, node, engine)) | |
d8af05ff CW |
841 | goto match; |
842 | } | |
843 | ||
d8af05ff CW |
844 | return NULL; |
845 | ||
846 | match: | |
9ff33bbc | 847 | spin_lock_irq(&ref->tree_lock); |
d8af05ff CW |
848 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ |
849 | if (p == &ref->cache->node) | |
5d934137 | 850 | WRITE_ONCE(ref->cache, NULL); |
c9ad602f | 851 | spin_unlock_irq(&ref->tree_lock); |
d8af05ff CW |
852 | |
853 | return rb_entry(p, struct active_node, node); | |
854 | } | |
855 | ||
ce476c80 CW |
856 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
857 | struct intel_engine_cs *engine) | |
858 | { | |
3f99a614 | 859 | intel_engine_mask_t tmp, mask = engine->mask; |
d4c3c0b8 | 860 | struct llist_node *first = NULL, *last = NULL; |
a50134b1 | 861 | struct intel_gt *gt = engine->gt; |
ce476c80 | 862 | |
b5e8e954 | 863 | GEM_BUG_ON(i915_active_is_idle(ref)); |
84135022 CW |
864 | |
865 | /* Wait until the previous preallocation is completed */ | |
866 | while (!llist_empty(&ref->preallocated_barriers)) | |
867 | cond_resched(); | |
d8af05ff CW |
868 | |
869 | /* | |
870 | * Preallocate a node for each physical engine supporting the target | |
871 | * engine (remember virtual engines have more than one sibling). | |
872 | * We can then use the preallocated nodes in | |
873 | * i915_active_acquire_barrier() | |
874 | */ | |
416d3838 | 875 | GEM_BUG_ON(!mask); |
a50134b1 | 876 | for_each_engine_masked(engine, gt, mask, tmp) { |
75d0a7f3 | 877 | u64 idx = engine->kernel_context->timeline->fence_context; |
d4c3c0b8 | 878 | struct llist_node *prev = first; |
ce476c80 CW |
879 | struct active_node *node; |
880 | ||
9ff33bbc | 881 | rcu_read_lock(); |
d8af05ff | 882 | node = reuse_idle_barrier(ref, idx); |
9ff33bbc | 883 | rcu_read_unlock(); |
d8af05ff | 884 | if (!node) { |
512ba03e | 885 | node = kmem_cache_alloc(slab_cache, GFP_KERNEL); |
e714977e | 886 | if (!node) |
d8af05ff | 887 | goto unwind; |
d8af05ff | 888 | |
b1e3177b CW |
889 | RCU_INIT_POINTER(node->base.fence, NULL); |
890 | node->base.cb.func = node_retire; | |
d8af05ff CW |
891 | node->timeline = idx; |
892 | node->ref = ref; | |
ce476c80 CW |
893 | } |
894 | ||
b1e3177b | 895 | if (!i915_active_fence_isset(&node->base)) { |
d8af05ff CW |
896 | /* |
897 | * Mark this as being *our* unconnected proto-node. | |
898 | * | |
899 | * Since this node is not in any list, and we have | |
900 | * decoupled it from the rbtree, we can reuse the | |
901 | * request to indicate this is an idle-barrier node | |
902 | * and then we can use the rb_node and list pointers | |
903 | * for our tracking of the pending barrier. | |
904 | */ | |
b1e3177b CW |
905 | RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); |
906 | node->base.cb.node.prev = (void *)engine; | |
5d934137 | 907 | __i915_active_acquire(ref); |
d8af05ff | 908 | } |
df9f85d8 | 909 | GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); |
ce476c80 | 910 | |
d8af05ff | 911 | GEM_BUG_ON(barrier_to_engine(node) != engine); |
d4c3c0b8 JRS |
912 | first = barrier_to_ll(node); |
913 | first->next = prev; | |
914 | if (!last) | |
915 | last = first; | |
7009db14 | 916 | intel_engine_pm_get(engine); |
ce476c80 CW |
917 | } |
918 | ||
84135022 | 919 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); |
d4c3c0b8 | 920 | llist_add_batch(first, last, &ref->preallocated_barriers); |
84135022 | 921 | |
7009db14 CW |
922 | return 0; |
923 | ||
924 | unwind: | |
d4c3c0b8 JRS |
925 | while (first) { |
926 | struct active_node *node = barrier_from_ll(first); | |
7009db14 | 927 | |
d4c3c0b8 | 928 | first = first->next; |
84135022 | 929 | |
d8af05ff CW |
930 | atomic_dec(&ref->count); |
931 | intel_engine_pm_put(barrier_to_engine(node)); | |
7009db14 | 932 | |
512ba03e | 933 | kmem_cache_free(slab_cache, node); |
7009db14 | 934 | } |
e714977e | 935 | return -ENOMEM; |
ce476c80 CW |
936 | } |
937 | ||
938 | void i915_active_acquire_barrier(struct i915_active *ref) | |
939 | { | |
940 | struct llist_node *pos, *next; | |
c9ad602f | 941 | unsigned long flags; |
ce476c80 | 942 | |
12c255b5 | 943 | GEM_BUG_ON(i915_active_is_idle(ref)); |
ce476c80 | 944 | |
d8af05ff CW |
945 | /* |
946 | * Transfer the list of preallocated barriers into the | |
947 | * i915_active rbtree, but only as proto-nodes. They will be | |
948 | * populated by i915_request_add_active_barriers() to point to the | |
949 | * request that will eventually release them. | |
950 | */ | |
d8af05ff CW |
951 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
952 | struct active_node *node = barrier_from_ll(pos); | |
953 | struct intel_engine_cs *engine = barrier_to_engine(node); | |
ce476c80 CW |
954 | struct rb_node **p, *parent; |
955 | ||
07779a76 CW |
956 | spin_lock_irqsave_nested(&ref->tree_lock, flags, |
957 | SINGLE_DEPTH_NESTING); | |
ce476c80 CW |
958 | parent = NULL; |
959 | p = &ref->tree.rb_node; | |
960 | while (*p) { | |
d8af05ff CW |
961 | struct active_node *it; |
962 | ||
ce476c80 | 963 | parent = *p; |
d8af05ff CW |
964 | |
965 | it = rb_entry(parent, struct active_node, node); | |
966 | if (it->timeline < node->timeline) | |
ce476c80 CW |
967 | p = &parent->rb_right; |
968 | else | |
969 | p = &parent->rb_left; | |
970 | } | |
971 | rb_link_node(&node->node, parent, p); | |
972 | rb_insert_color(&node->node, &ref->tree); | |
07779a76 | 973 | spin_unlock_irqrestore(&ref->tree_lock, flags); |
ce476c80 | 974 | |
b7234840 | 975 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
d8af05ff | 976 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); |
1ea7fe77 | 977 | intel_engine_pm_put_delay(engine, 2); |
ce476c80 | 978 | } |
ce476c80 CW |
979 | } |
980 | ||
df9f85d8 CW |
981 | static struct dma_fence **ll_to_fence_slot(struct llist_node *node) |
982 | { | |
983 | return __active_fence_slot(&barrier_from_ll(node)->base); | |
984 | } | |
985 | ||
d8af05ff | 986 | void i915_request_add_active_barriers(struct i915_request *rq) |
ce476c80 CW |
987 | { |
988 | struct intel_engine_cs *engine = rq->engine; | |
989 | struct llist_node *node, *next; | |
b1e3177b | 990 | unsigned long flags; |
ce476c80 | 991 | |
e6ba7648 | 992 | GEM_BUG_ON(!intel_context_is_barrier(rq->context)); |
d8af05ff | 993 | GEM_BUG_ON(intel_engine_is_virtual(engine)); |
d19d71fc | 994 | GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); |
d8af05ff | 995 | |
b1e3177b CW |
996 | node = llist_del_all(&engine->barrier_tasks); |
997 | if (!node) | |
998 | return; | |
d8af05ff CW |
999 | /* |
1000 | * Attach the list of proto-fences to the in-flight request such | |
1001 | * that the parent i915_active will be released when this request | |
1002 | * is retired. | |
1003 | */ | |
b1e3177b CW |
1004 | spin_lock_irqsave(&rq->lock, flags); |
1005 | llist_for_each_safe(node, next, node) { | |
df9f85d8 CW |
1006 | /* serialise with reuse_idle_barrier */ |
1007 | smp_store_mb(*ll_to_fence_slot(node), &rq->fence); | |
b1e3177b CW |
1008 | list_add_tail((struct list_head *)node, &rq->fence.cb_list); |
1009 | } | |
1010 | spin_unlock_irqrestore(&rq->lock, flags); | |
1011 | } | |
1012 | ||
b1e3177b CW |
1013 | /* |
1014 | * __i915_active_fence_set: Update the last active fence along its timeline | |
1015 | * @active: the active tracker | |
1016 | * @fence: the new fence (under construction) | |
1017 | * | |
1018 | * Records the new @fence as the last active fence along its timeline in | |
1019 | * this active tracker, moving the tracking callbacks from the previous | |
1020 | * fence onto this one. Returns the previous fence (if not already completed), | |
1021 | * which the caller must ensure is executed before the new fence. To ensure | |
1022 | * that the order of fences within the timeline of the i915_active_fence is | |
df9f85d8 | 1023 | * understood, it should be locked by the caller. |
b1e3177b CW |
1024 | */ |
1025 | struct dma_fence * | |
1026 | __i915_active_fence_set(struct i915_active_fence *active, | |
1027 | struct dma_fence *fence) | |
1028 | { | |
1029 | struct dma_fence *prev; | |
1030 | unsigned long flags; | |
1031 | ||
df9f85d8 CW |
1032 | if (fence == rcu_access_pointer(active->fence)) |
1033 | return fence; | |
1034 | ||
b1e3177b CW |
1035 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); |
1036 | ||
df9f85d8 CW |
1037 | /* |
1038 | * Consider that we have two threads arriving (A and B), with | |
1039 | * C already resident as the active->fence. | |
1040 | * | |
1041 | * A does the xchg first, and so it sees C or NULL depending | |
1042 | * on the timing of the interrupt handler. If it is NULL, the | |
1043 | * previous fence must have been signaled and we know that | |
1044 | * we are first on the timeline. If it is still present, | |
1045 | * we acquire the lock on that fence and serialise with the interrupt | |
1046 | * handler, in the process removing it from any future interrupt | |
1047 | * callback. A will then wait on C before executing (if present). | |
1048 | * | |
1049 | * As B is second, it sees A as the previous fence and so waits for | |
1050 | * it to complete its transition and takes over the occupancy for | |
1051 | * itself -- remembering that it needs to wait on A before executing. | |
1052 | * | |
1053 | * Note the strong ordering of the timeline also provides consistent | |
1054 | * nesting rules for the fence->lock; the inner lock is always the | |
1055 | * older lock. | |
1056 | */ | |
1057 | spin_lock_irqsave(fence->lock, flags); | |
1058 | prev = xchg(__active_fence_slot(active), fence); | |
b1e3177b CW |
1059 | if (prev) { |
1060 | GEM_BUG_ON(prev == fence); | |
1061 | spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); | |
1062 | __list_del_entry(&active->cb.node); | |
1063 | spin_unlock(prev->lock); /* serialise with prev->cb_list */ | |
d8af05ff | 1064 | } |
b1e3177b | 1065 | list_add_tail(&active->cb.node, &fence->cb_list); |
b1e3177b CW |
1066 | spin_unlock_irqrestore(fence->lock, flags); |
1067 | ||
1068 | return prev; | |
ce476c80 CW |
1069 | } |
1070 | ||
b1e3177b CW |
1071 | int i915_active_fence_set(struct i915_active_fence *active, |
1072 | struct i915_request *rq) | |
21950ee7 | 1073 | { |
b1e3177b CW |
1074 | struct dma_fence *fence; |
1075 | int err = 0; | |
21950ee7 | 1076 | |
b1e3177b CW |
1077 | /* Must maintain timeline ordering wrt previous active requests */ |
1078 | rcu_read_lock(); | |
1079 | fence = __i915_active_fence_set(active, &rq->fence); | |
1080 | if (fence) /* but the previous fence may not belong to that timeline! */ | |
1081 | fence = dma_fence_get_rcu(fence); | |
1082 | rcu_read_unlock(); | |
1083 | if (fence) { | |
1084 | err = i915_request_await_dma_fence(rq, fence); | |
1085 | dma_fence_put(fence); | |
1086 | } | |
21950ee7 | 1087 | |
b1e3177b | 1088 | return err; |
21950ee7 CW |
1089 | } |
1090 | ||
b1e3177b | 1091 | void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) |
21950ee7 | 1092 | { |
df9f85d8 | 1093 | active_fence_cb(fence, cb); |
21950ee7 CW |
1094 | } |
1095 | ||
229007e0 CW |
1096 | struct auto_active { |
1097 | struct i915_active base; | |
1098 | struct kref ref; | |
1099 | }; | |
1100 | ||
1101 | struct i915_active *i915_active_get(struct i915_active *ref) | |
1102 | { | |
1103 | struct auto_active *aa = container_of(ref, typeof(*aa), base); | |
1104 | ||
1105 | kref_get(&aa->ref); | |
1106 | return &aa->base; | |
1107 | } | |
1108 | ||
1109 | static void auto_release(struct kref *ref) | |
1110 | { | |
1111 | struct auto_active *aa = container_of(ref, typeof(*aa), ref); | |
1112 | ||
1113 | i915_active_fini(&aa->base); | |
1114 | kfree(aa); | |
1115 | } | |
1116 | ||
1117 | void i915_active_put(struct i915_active *ref) | |
1118 | { | |
1119 | struct auto_active *aa = container_of(ref, typeof(*aa), base); | |
1120 | ||
1121 | kref_put(&aa->ref, auto_release); | |
1122 | } | |
1123 | ||
1124 | static int auto_active(struct i915_active *ref) | |
1125 | { | |
1126 | i915_active_get(ref); | |
1127 | return 0; | |
1128 | } | |
1129 | ||
c3b14760 | 1130 | static void auto_retire(struct i915_active *ref) |
229007e0 CW |
1131 | { |
1132 | i915_active_put(ref); | |
1133 | } | |
1134 | ||
1135 | struct i915_active *i915_active_create(void) | |
1136 | { | |
1137 | struct auto_active *aa; | |
1138 | ||
1139 | aa = kmalloc(sizeof(*aa), GFP_KERNEL); | |
1140 | if (!aa) | |
1141 | return NULL; | |
1142 | ||
1143 | kref_init(&aa->ref); | |
c3b14760 | 1144 | i915_active_init(&aa->base, auto_active, auto_retire, 0); |
229007e0 CW |
1145 | |
1146 | return &aa->base; | |
1147 | } | |
1148 | ||
64d6c500 CW |
1149 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1150 | #include "selftests/i915_active.c" | |
1151 | #endif | |
5f5c139d | 1152 | |
512ba03e | 1153 | void i915_active_module_exit(void) |
32eb6bcf | 1154 | { |
512ba03e | 1155 | kmem_cache_destroy(slab_cache); |
32eb6bcf CW |
1156 | } |
1157 | ||
512ba03e | 1158 | int __init i915_active_module_init(void) |
5f5c139d | 1159 | { |
512ba03e DV |
1160 | slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
1161 | if (!slab_cache) | |
103b76ee CW |
1162 | return -ENOMEM; |
1163 | ||
103b76ee | 1164 | return 0; |
5f5c139d | 1165 | } |