Commit | Line | Data |
---|---|---|
64d6c500 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2019 Intel Corporation | |
5 | */ | |
6 | ||
5361db1a CW |
7 | #include <linux/debugobjects.h> |
8 | ||
e6ba7648 | 9 | #include "gt/intel_context.h" |
d13a3177 | 10 | #include "gt/intel_engine_heartbeat.h" |
7009db14 | 11 | #include "gt/intel_engine_pm.h" |
2871ea85 | 12 | #include "gt/intel_ring.h" |
7009db14 | 13 | |
64d6c500 CW |
14 | #include "i915_drv.h" |
15 | #include "i915_active.h" | |
103b76ee | 16 | #include "i915_globals.h" |
64d6c500 | 17 | |
5f5c139d CW |
18 | /* |
19 | * Active refs memory management | |
20 | * | |
21 | * To be more economical with memory, we reap all the i915_active trees as | |
22 | * they idle (when we know the active requests are inactive) and allocate the | |
23 | * nodes from a local slab cache to hopefully reduce the fragmentation. | |
24 | */ | |
25 | static struct i915_global_active { | |
103b76ee | 26 | struct i915_global base; |
5f5c139d CW |
27 | struct kmem_cache *slab_cache; |
28 | } global; | |
29 | ||
64d6c500 | 30 | struct active_node { |
5d934137 | 31 | struct rb_node node; |
b1e3177b | 32 | struct i915_active_fence base; |
64d6c500 | 33 | struct i915_active *ref; |
64d6c500 CW |
34 | u64 timeline; |
35 | }; | |
36 | ||
5d934137 CW |
37 | #define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node) |
38 | ||
d8af05ff | 39 | static inline struct active_node * |
b1e3177b | 40 | node_from_active(struct i915_active_fence *active) |
d8af05ff CW |
41 | { |
42 | return container_of(active, struct active_node, base); | |
43 | } | |
44 | ||
45 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) | |
46 | ||
b1e3177b | 47 | static inline bool is_barrier(const struct i915_active_fence *active) |
d8af05ff | 48 | { |
b1e3177b | 49 | return IS_ERR(rcu_access_pointer(active->fence)); |
d8af05ff CW |
50 | } |
51 | ||
52 | static inline struct llist_node *barrier_to_ll(struct active_node *node) | |
53 | { | |
54 | GEM_BUG_ON(!is_barrier(&node->base)); | |
b1e3177b | 55 | return (struct llist_node *)&node->base.cb.node; |
d8af05ff CW |
56 | } |
57 | ||
f130b712 CW |
58 | static inline struct intel_engine_cs * |
59 | __barrier_to_engine(struct active_node *node) | |
60 | { | |
b1e3177b | 61 | return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); |
f130b712 CW |
62 | } |
63 | ||
d8af05ff CW |
64 | static inline struct intel_engine_cs * |
65 | barrier_to_engine(struct active_node *node) | |
66 | { | |
67 | GEM_BUG_ON(!is_barrier(&node->base)); | |
f130b712 | 68 | return __barrier_to_engine(node); |
d8af05ff CW |
69 | } |
70 | ||
71 | static inline struct active_node *barrier_from_ll(struct llist_node *x) | |
72 | { | |
73 | return container_of((struct list_head *)x, | |
b1e3177b | 74 | struct active_node, base.cb.node); |
d8af05ff CW |
75 | } |
76 | ||
5361db1a CW |
77 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
78 | ||
79 | static void *active_debug_hint(void *addr) | |
80 | { | |
81 | struct i915_active *ref = addr; | |
82 | ||
12c255b5 | 83 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
5361db1a CW |
84 | } |
85 | ||
86 | static struct debug_obj_descr active_debug_desc = { | |
87 | .name = "i915_active", | |
88 | .debug_hint = active_debug_hint, | |
89 | }; | |
90 | ||
91 | static void debug_active_init(struct i915_active *ref) | |
92 | { | |
93 | debug_object_init(ref, &active_debug_desc); | |
94 | } | |
95 | ||
96 | static void debug_active_activate(struct i915_active *ref) | |
97 | { | |
bbca083d | 98 | lockdep_assert_held(&ref->tree_lock); |
f52c6d0d CW |
99 | if (!atomic_read(&ref->count)) /* before the first inc */ |
100 | debug_object_activate(ref, &active_debug_desc); | |
5361db1a CW |
101 | } |
102 | ||
103 | static void debug_active_deactivate(struct i915_active *ref) | |
104 | { | |
c9ad602f | 105 | lockdep_assert_held(&ref->tree_lock); |
f52c6d0d CW |
106 | if (!atomic_read(&ref->count)) /* after the last dec */ |
107 | debug_object_deactivate(ref, &active_debug_desc); | |
5361db1a CW |
108 | } |
109 | ||
110 | static void debug_active_fini(struct i915_active *ref) | |
111 | { | |
112 | debug_object_free(ref, &active_debug_desc); | |
113 | } | |
114 | ||
115 | static void debug_active_assert(struct i915_active *ref) | |
116 | { | |
117 | debug_object_assert_init(ref, &active_debug_desc); | |
118 | } | |
119 | ||
120 | #else | |
121 | ||
122 | static inline void debug_active_init(struct i915_active *ref) { } | |
123 | static inline void debug_active_activate(struct i915_active *ref) { } | |
124 | static inline void debug_active_deactivate(struct i915_active *ref) { } | |
125 | static inline void debug_active_fini(struct i915_active *ref) { } | |
126 | static inline void debug_active_assert(struct i915_active *ref) { } | |
127 | ||
128 | #endif | |
129 | ||
a42375af | 130 | static void |
12c255b5 | 131 | __active_retire(struct i915_active *ref) |
a42375af | 132 | { |
99a7f4da | 133 | struct rb_root root = RB_ROOT; |
a42375af | 134 | struct active_node *it, *n; |
c9ad602f | 135 | unsigned long flags; |
12c255b5 | 136 | |
274cbf20 | 137 | GEM_BUG_ON(i915_active_is_idle(ref)); |
12c255b5 CW |
138 | |
139 | /* return the unused nodes to our slabcache -- flushing the allocator */ | |
c9ad602f | 140 | if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) |
12c255b5 CW |
141 | return; |
142 | ||
b1e3177b | 143 | GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); |
c9ad602f CW |
144 | debug_active_deactivate(ref); |
145 | ||
99a7f4da CW |
146 | /* Even if we have not used the cache, we may still have a barrier */ |
147 | if (!ref->cache) | |
148 | ref->cache = fetch_node(ref->tree.rb_node); | |
149 | ||
150 | /* Keep the MRU cached node for reuse */ | |
151 | if (ref->cache) { | |
152 | /* Discard all other nodes in the tree */ | |
153 | rb_erase(&ref->cache->node, &ref->tree); | |
154 | root = ref->tree; | |
155 | ||
156 | /* Rebuild the tree with only the cached node */ | |
157 | rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); | |
158 | rb_insert_color(&ref->cache->node, &ref->tree); | |
159 | GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); | |
e28860ae CW |
160 | |
161 | /* Make the cached node available for reuse with any timeline */ | |
162 | if (IS_ENABLED(CONFIG_64BIT)) | |
163 | ref->cache->timeline = 0; /* needs cmpxchg(u64) */ | |
99a7f4da | 164 | } |
c9ad602f CW |
165 | |
166 | spin_unlock_irqrestore(&ref->tree_lock, flags); | |
e1d7b66b CW |
167 | |
168 | /* After the final retire, the entire struct may be freed */ | |
169 | if (ref->retire) | |
170 | ref->retire(ref); | |
b1e3177b CW |
171 | |
172 | /* ... except if you wait on it, you must manage your own references! */ | |
173 | wake_up_var(ref); | |
c9ad602f | 174 | |
99a7f4da | 175 | /* Finally free the discarded timeline tree */ |
c9ad602f CW |
176 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { |
177 | GEM_BUG_ON(i915_active_fence_isset(&it->base)); | |
178 | kmem_cache_free(global.slab_cache, it); | |
179 | } | |
a42375af CW |
180 | } |
181 | ||
274cbf20 CW |
182 | static void |
183 | active_work(struct work_struct *wrk) | |
184 | { | |
185 | struct i915_active *ref = container_of(wrk, typeof(*ref), work); | |
186 | ||
187 | GEM_BUG_ON(!atomic_read(&ref->count)); | |
188 | if (atomic_add_unless(&ref->count, -1, 1)) | |
189 | return; | |
190 | ||
274cbf20 CW |
191 | __active_retire(ref); |
192 | } | |
193 | ||
64d6c500 | 194 | static void |
12c255b5 | 195 | active_retire(struct i915_active *ref) |
64d6c500 | 196 | { |
12c255b5 CW |
197 | GEM_BUG_ON(!atomic_read(&ref->count)); |
198 | if (atomic_add_unless(&ref->count, -1, 1)) | |
a42375af CW |
199 | return; |
200 | ||
c9ad602f | 201 | if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { |
274cbf20 CW |
202 | queue_work(system_unbound_wq, &ref->work); |
203 | return; | |
204 | } | |
205 | ||
12c255b5 | 206 | __active_retire(ref); |
64d6c500 CW |
207 | } |
208 | ||
df9f85d8 CW |
209 | static inline struct dma_fence ** |
210 | __active_fence_slot(struct i915_active_fence *active) | |
211 | { | |
212 | return (struct dma_fence ** __force)&active->fence; | |
213 | } | |
214 | ||
215 | static inline bool | |
216 | active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) | |
217 | { | |
218 | struct i915_active_fence *active = | |
219 | container_of(cb, typeof(*active), cb); | |
220 | ||
221 | return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; | |
222 | } | |
223 | ||
64d6c500 | 224 | static void |
b1e3177b | 225 | node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) |
64d6c500 | 226 | { |
df9f85d8 CW |
227 | if (active_fence_cb(fence, cb)) |
228 | active_retire(container_of(cb, struct active_node, base.cb)->ref); | |
64d6c500 CW |
229 | } |
230 | ||
b1e3177b CW |
231 | static void |
232 | excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) | |
233 | { | |
df9f85d8 CW |
234 | if (active_fence_cb(fence, cb)) |
235 | active_retire(container_of(cb, struct i915_active, excl.cb)); | |
b1e3177b CW |
236 | } |
237 | ||
5d934137 | 238 | static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) |
64d6c500 | 239 | { |
5d934137 | 240 | struct active_node *it; |
64d6c500 | 241 | |
e28860ae CW |
242 | GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */ |
243 | ||
64d6c500 CW |
244 | /* |
245 | * We track the most recently used timeline to skip a rbtree search | |
246 | * for the common case, under typical loads we never need the rbtree | |
247 | * at all. We can reuse the last slot if it is empty, that is | |
248 | * after the previous activity has been retired, or if it matches the | |
249 | * current timeline. | |
64d6c500 | 250 | */ |
5d934137 | 251 | it = READ_ONCE(ref->cache); |
e28860ae CW |
252 | if (it) { |
253 | u64 cached = READ_ONCE(it->timeline); | |
254 | ||
255 | /* Once claimed, this slot will only belong to this idx */ | |
256 | if (cached == idx) | |
257 | return it; | |
258 | ||
259 | #ifdef CONFIG_64BIT /* for cmpxchg(u64) */ | |
260 | /* | |
261 | * An unclaimed cache [.timeline=0] can only be claimed once. | |
262 | * | |
263 | * If the value is already non-zero, some other thread has | |
264 | * claimed the cache and we know that is does not match our | |
265 | * idx. If, and only if, the timeline is currently zero is it | |
266 | * worth competing to claim it atomically for ourselves (for | |
267 | * only the winner of that race will cmpxchg return the old | |
268 | * value of 0). | |
269 | */ | |
270 | if (!cached && !cmpxchg(&it->timeline, 0, idx)) | |
271 | return it; | |
272 | #endif | |
273 | } | |
5d934137 CW |
274 | |
275 | BUILD_BUG_ON(offsetof(typeof(*it), node)); | |
276 | ||
277 | /* While active, the tree can only be built; not destroyed */ | |
278 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
279 | ||
280 | it = fetch_node(ref->tree.rb_node); | |
281 | while (it) { | |
282 | if (it->timeline < idx) { | |
283 | it = fetch_node(it->node.rb_right); | |
284 | } else if (it->timeline > idx) { | |
285 | it = fetch_node(it->node.rb_left); | |
286 | } else { | |
287 | WRITE_ONCE(ref->cache, it); | |
288 | break; | |
289 | } | |
290 | } | |
291 | ||
292 | /* NB: If the tree rotated beneath us, we may miss our target. */ | |
293 | return it; | |
294 | } | |
295 | ||
296 | static struct i915_active_fence * | |
297 | active_instance(struct i915_active *ref, u64 idx) | |
298 | { | |
299 | struct active_node *node, *prealloc; | |
300 | struct rb_node **p, *parent; | |
301 | ||
302 | node = __active_lookup(ref, idx); | |
303 | if (likely(node)) | |
12c255b5 CW |
304 | return &node->base; |
305 | ||
306 | /* Preallocate a replacement, just in case */ | |
307 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
308 | if (!prealloc) | |
309 | return NULL; | |
64d6c500 | 310 | |
c9ad602f | 311 | spin_lock_irq(&ref->tree_lock); |
12c255b5 | 312 | GEM_BUG_ON(i915_active_is_idle(ref)); |
64d6c500 CW |
313 | |
314 | parent = NULL; | |
315 | p = &ref->tree.rb_node; | |
316 | while (*p) { | |
317 | parent = *p; | |
318 | ||
319 | node = rb_entry(parent, struct active_node, node); | |
12c255b5 CW |
320 | if (node->timeline == idx) { |
321 | kmem_cache_free(global.slab_cache, prealloc); | |
322 | goto out; | |
323 | } | |
64d6c500 CW |
324 | |
325 | if (node->timeline < idx) | |
326 | p = &parent->rb_right; | |
327 | else | |
328 | p = &parent->rb_left; | |
329 | } | |
330 | ||
12c255b5 | 331 | node = prealloc; |
df9f85d8 | 332 | __i915_active_fence_init(&node->base, NULL, node_retire); |
64d6c500 CW |
333 | node->ref = ref; |
334 | node->timeline = idx; | |
335 | ||
336 | rb_link_node(&node->node, parent, p); | |
337 | rb_insert_color(&node->node, &ref->tree); | |
338 | ||
64d6c500 | 339 | out: |
5d934137 | 340 | WRITE_ONCE(ref->cache, node); |
c9ad602f | 341 | spin_unlock_irq(&ref->tree_lock); |
12c255b5 CW |
342 | |
343 | return &node->base; | |
64d6c500 CW |
344 | } |
345 | ||
b1e3177b | 346 | void __i915_active_init(struct i915_active *ref, |
12c255b5 CW |
347 | int (*active)(struct i915_active *ref), |
348 | void (*retire)(struct i915_active *ref), | |
ae303004 CW |
349 | struct lock_class_key *mkey, |
350 | struct lock_class_key *wkey) | |
64d6c500 | 351 | { |
274cbf20 CW |
352 | unsigned long bits; |
353 | ||
5361db1a CW |
354 | debug_active_init(ref); |
355 | ||
79c7a28e | 356 | ref->flags = 0; |
12c255b5 | 357 | ref->active = active; |
274cbf20 CW |
358 | ref->retire = ptr_unpack_bits(retire, &bits, 2); |
359 | if (bits & I915_ACTIVE_MAY_SLEEP) | |
360 | ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; | |
2850748e | 361 | |
c9ad602f | 362 | spin_lock_init(&ref->tree_lock); |
64d6c500 | 363 | ref->tree = RB_ROOT; |
12c255b5 | 364 | ref->cache = NULL; |
c9ad602f | 365 | |
d8af05ff | 366 | init_llist_head(&ref->preallocated_barriers); |
12c255b5 | 367 | atomic_set(&ref->count, 0); |
ae303004 | 368 | __mutex_init(&ref->mutex, "i915_active", mkey); |
df9f85d8 | 369 | __i915_active_fence_init(&ref->excl, NULL, excl_retire); |
274cbf20 | 370 | INIT_WORK(&ref->work, active_work); |
ae303004 CW |
371 | #if IS_ENABLED(CONFIG_LOCKDEP) |
372 | lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); | |
373 | #endif | |
64d6c500 CW |
374 | } |
375 | ||
f130b712 CW |
376 | static bool ____active_del_barrier(struct i915_active *ref, |
377 | struct active_node *node, | |
378 | struct intel_engine_cs *engine) | |
379 | ||
d8af05ff | 380 | { |
d8af05ff CW |
381 | struct llist_node *head = NULL, *tail = NULL; |
382 | struct llist_node *pos, *next; | |
383 | ||
75d0a7f3 | 384 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); |
d8af05ff CW |
385 | |
386 | /* | |
387 | * Rebuild the llist excluding our node. We may perform this | |
388 | * outside of the kernel_context timeline mutex and so someone | |
389 | * else may be manipulating the engine->barrier_tasks, in | |
390 | * which case either we or they will be upset :) | |
391 | * | |
392 | * A second __active_del_barrier() will report failure to claim | |
393 | * the active_node and the caller will just shrug and know not to | |
394 | * claim ownership of its node. | |
395 | * | |
396 | * A concurrent i915_request_add_active_barriers() will miss adding | |
397 | * any of the tasks, but we will try again on the next -- and since | |
398 | * we are actively using the barrier, we know that there will be | |
399 | * at least another opportunity when we idle. | |
400 | */ | |
401 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { | |
402 | if (node == barrier_from_ll(pos)) { | |
403 | node = NULL; | |
404 | continue; | |
405 | } | |
406 | ||
407 | pos->next = head; | |
408 | head = pos; | |
409 | if (!tail) | |
410 | tail = pos; | |
411 | } | |
412 | if (head) | |
413 | llist_add_batch(head, tail, &engine->barrier_tasks); | |
414 | ||
415 | return !node; | |
416 | } | |
417 | ||
f130b712 CW |
418 | static bool |
419 | __active_del_barrier(struct i915_active *ref, struct active_node *node) | |
420 | { | |
421 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); | |
422 | } | |
423 | ||
5d934137 CW |
424 | static bool |
425 | replace_barrier(struct i915_active *ref, struct i915_active_fence *active) | |
426 | { | |
427 | if (!is_barrier(active)) /* proto-node used by our idle barrier? */ | |
428 | return false; | |
429 | ||
430 | /* | |
431 | * This request is on the kernel_context timeline, and so | |
432 | * we can use it to substitute for the pending idle-barrer | |
433 | * request that we want to emit on the kernel_context. | |
434 | */ | |
435 | __active_del_barrier(ref, node_from_active(active)); | |
436 | return true; | |
437 | } | |
438 | ||
439 | int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) | |
64d6c500 | 440 | { |
b1e3177b | 441 | struct i915_active_fence *active; |
12c255b5 | 442 | int err; |
312c4ba1 CW |
443 | |
444 | /* Prevent reaping in case we malloc/wait while building the tree */ | |
12c255b5 CW |
445 | err = i915_active_acquire(ref); |
446 | if (err) | |
447 | return err; | |
64d6c500 | 448 | |
5d934137 | 449 | active = active_instance(ref, idx); |
12c255b5 CW |
450 | if (!active) { |
451 | err = -ENOMEM; | |
312c4ba1 CW |
452 | goto out; |
453 | } | |
64d6c500 | 454 | |
5d934137 | 455 | if (replace_barrier(ref, active)) { |
b1e3177b CW |
456 | RCU_INIT_POINTER(active->fence, NULL); |
457 | atomic_dec(&ref->count); | |
d8af05ff | 458 | } |
b1e3177b | 459 | if (!__i915_active_fence_set(active, fence)) |
5d934137 | 460 | __i915_active_acquire(ref); |
64d6c500 | 461 | |
312c4ba1 CW |
462 | out: |
463 | i915_active_release(ref); | |
464 | return err; | |
64d6c500 CW |
465 | } |
466 | ||
5d934137 CW |
467 | static struct dma_fence * |
468 | __i915_active_set_fence(struct i915_active *ref, | |
469 | struct i915_active_fence *active, | |
470 | struct dma_fence *fence) | |
2850748e | 471 | { |
e3793468 CW |
472 | struct dma_fence *prev; |
473 | ||
5d934137 CW |
474 | if (replace_barrier(ref, active)) { |
475 | RCU_INIT_POINTER(active->fence, fence); | |
476 | return NULL; | |
477 | } | |
2850748e | 478 | |
30ca04e1 | 479 | rcu_read_lock(); |
5d934137 | 480 | prev = __i915_active_fence_set(active, fence); |
30ca04e1 CW |
481 | if (prev) |
482 | prev = dma_fence_get_rcu(prev); | |
483 | else | |
5d934137 | 484 | __i915_active_acquire(ref); |
30ca04e1 | 485 | rcu_read_unlock(); |
e3793468 CW |
486 | |
487 | return prev; | |
b1e3177b | 488 | } |
2850748e | 489 | |
5d934137 CW |
490 | static struct i915_active_fence * |
491 | __active_fence(struct i915_active *ref, u64 idx) | |
492 | { | |
493 | struct active_node *it; | |
494 | ||
495 | it = __active_lookup(ref, idx); | |
496 | if (unlikely(!it)) { /* Contention with parallel tree builders! */ | |
497 | spin_lock_irq(&ref->tree_lock); | |
498 | it = __active_lookup(ref, idx); | |
499 | spin_unlock_irq(&ref->tree_lock); | |
500 | } | |
501 | GEM_BUG_ON(!it); /* slot must be preallocated */ | |
502 | ||
503 | return &it->base; | |
504 | } | |
505 | ||
506 | struct dma_fence * | |
507 | __i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) | |
508 | { | |
509 | /* Only valid while active, see i915_active_acquire_for_context() */ | |
510 | return __i915_active_set_fence(ref, __active_fence(ref, idx), fence); | |
511 | } | |
512 | ||
513 | struct dma_fence * | |
514 | i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) | |
515 | { | |
516 | /* We expect the caller to manage the exclusive timeline ordering */ | |
517 | return __i915_active_set_fence(ref, &ref->excl, f); | |
518 | } | |
519 | ||
b1e3177b CW |
520 | bool i915_active_acquire_if_busy(struct i915_active *ref) |
521 | { | |
522 | debug_active_assert(ref); | |
523 | return atomic_add_unless(&ref->count, 1, 0); | |
2850748e CW |
524 | } |
525 | ||
04240e30 CW |
526 | static void __i915_active_activate(struct i915_active *ref) |
527 | { | |
528 | spin_lock_irq(&ref->tree_lock); /* __active_retire() */ | |
529 | if (!atomic_fetch_inc(&ref->count)) | |
530 | debug_active_activate(ref); | |
531 | spin_unlock_irq(&ref->tree_lock); | |
532 | } | |
533 | ||
12c255b5 | 534 | int i915_active_acquire(struct i915_active *ref) |
64d6c500 | 535 | { |
12c255b5 CW |
536 | int err; |
537 | ||
b1e3177b | 538 | if (i915_active_acquire_if_busy(ref)) |
12c255b5 | 539 | return 0; |
5361db1a | 540 | |
04240e30 CW |
541 | if (!ref->active) { |
542 | __i915_active_activate(ref); | |
543 | return 0; | |
544 | } | |
545 | ||
12c255b5 CW |
546 | err = mutex_lock_interruptible(&ref->mutex); |
547 | if (err) | |
548 | return err; | |
5361db1a | 549 | |
ac0e331a | 550 | if (likely(!i915_active_acquire_if_busy(ref))) { |
04240e30 CW |
551 | err = ref->active(ref); |
552 | if (!err) | |
553 | __i915_active_activate(ref); | |
12c255b5 CW |
554 | } |
555 | ||
556 | mutex_unlock(&ref->mutex); | |
557 | ||
558 | return err; | |
64d6c500 CW |
559 | } |
560 | ||
5d934137 CW |
561 | int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) |
562 | { | |
563 | struct i915_active_fence *active; | |
564 | int err; | |
565 | ||
566 | err = i915_active_acquire(ref); | |
567 | if (err) | |
568 | return err; | |
569 | ||
570 | active = active_instance(ref, idx); | |
571 | if (!active) { | |
572 | i915_active_release(ref); | |
573 | return -ENOMEM; | |
574 | } | |
575 | ||
576 | return 0; /* return with active ref */ | |
577 | } | |
578 | ||
64d6c500 CW |
579 | void i915_active_release(struct i915_active *ref) |
580 | { | |
5361db1a | 581 | debug_active_assert(ref); |
12c255b5 | 582 | active_retire(ref); |
64d6c500 CW |
583 | } |
584 | ||
b1e3177b | 585 | static void enable_signaling(struct i915_active_fence *active) |
79c7a28e | 586 | { |
b1e3177b | 587 | struct dma_fence *fence; |
79c7a28e | 588 | |
c0e31018 CW |
589 | if (unlikely(is_barrier(active))) |
590 | return; | |
591 | ||
b1e3177b CW |
592 | fence = i915_active_fence_get(active); |
593 | if (!fence) | |
594 | return; | |
2850748e | 595 | |
b1e3177b CW |
596 | dma_fence_enable_sw_signaling(fence); |
597 | dma_fence_put(fence); | |
2850748e CW |
598 | } |
599 | ||
d13a3177 | 600 | static int flush_barrier(struct active_node *it) |
64d6c500 | 601 | { |
d13a3177 | 602 | struct intel_engine_cs *engine; |
64d6c500 | 603 | |
d13a3177 CW |
604 | if (likely(!is_barrier(&it->base))) |
605 | return 0; | |
12c255b5 | 606 | |
d13a3177 CW |
607 | engine = __barrier_to_engine(it); |
608 | smp_rmb(); /* serialise with add_active_barriers */ | |
609 | if (!is_barrier(&it->base)) | |
12c255b5 | 610 | return 0; |
2850748e | 611 | |
d13a3177 CW |
612 | return intel_engine_flush_barriers(engine); |
613 | } | |
614 | ||
615 | static int flush_lazy_signals(struct i915_active *ref) | |
616 | { | |
617 | struct active_node *it, *n; | |
618 | int err = 0; | |
619 | ||
b1e3177b | 620 | enable_signaling(&ref->excl); |
64d6c500 | 621 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
d13a3177 CW |
622 | err = flush_barrier(it); /* unconnected idle barrier? */ |
623 | if (err) | |
624 | break; | |
d8af05ff | 625 | |
b1e3177b | 626 | enable_signaling(&it->base); |
64d6c500 CW |
627 | } |
628 | ||
d13a3177 CW |
629 | return err; |
630 | } | |
631 | ||
d75a92a8 | 632 | int __i915_active_wait(struct i915_active *ref, int state) |
d13a3177 CW |
633 | { |
634 | int err; | |
635 | ||
636 | might_sleep(); | |
637 | ||
638 | if (!i915_active_acquire_if_busy(ref)) | |
639 | return 0; | |
640 | ||
641 | /* Any fence added after the wait begins will not be auto-signaled */ | |
642 | err = flush_lazy_signals(ref); | |
b1e3177b | 643 | i915_active_release(ref); |
afd1bcd4 CW |
644 | if (err) |
645 | return err; | |
646 | ||
d75a92a8 CW |
647 | if (!i915_active_is_idle(ref) && |
648 | ___wait_var_event(ref, i915_active_is_idle(ref), | |
649 | state, 0, 0, schedule())) | |
79c7a28e CW |
650 | return -EINTR; |
651 | ||
e1cda6a5 | 652 | flush_work(&ref->work); |
afd1bcd4 | 653 | return 0; |
64d6c500 CW |
654 | } |
655 | ||
29e6ecf3 CW |
656 | static int __await_active(struct i915_active_fence *active, |
657 | int (*fn)(void *arg, struct dma_fence *fence), | |
658 | void *arg) | |
659 | { | |
660 | struct dma_fence *fence; | |
661 | ||
662 | if (is_barrier(active)) /* XXX flush the barrier? */ | |
663 | return 0; | |
664 | ||
665 | fence = i915_active_fence_get(active); | |
666 | if (fence) { | |
667 | int err; | |
668 | ||
669 | err = fn(arg, fence); | |
670 | dma_fence_put(fence); | |
671 | if (err < 0) | |
672 | return err; | |
673 | } | |
674 | ||
675 | return 0; | |
676 | } | |
677 | ||
3b0a0579 CW |
678 | struct wait_barrier { |
679 | struct wait_queue_entry base; | |
680 | struct i915_active *ref; | |
681 | }; | |
682 | ||
683 | static int | |
684 | barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key) | |
685 | { | |
686 | struct wait_barrier *wb = container_of(wq, typeof(*wb), base); | |
687 | ||
688 | if (i915_active_is_idle(wb->ref)) { | |
689 | list_del(&wq->entry); | |
690 | i915_sw_fence_complete(wq->private); | |
691 | kfree(wq); | |
692 | } | |
693 | ||
694 | return 0; | |
695 | } | |
696 | ||
697 | static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence) | |
698 | { | |
699 | struct wait_barrier *wb; | |
700 | ||
701 | wb = kmalloc(sizeof(*wb), GFP_KERNEL); | |
702 | if (unlikely(!wb)) | |
703 | return -ENOMEM; | |
704 | ||
705 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
706 | if (!i915_sw_fence_await(fence)) { | |
707 | kfree(wb); | |
708 | return -EINVAL; | |
709 | } | |
710 | ||
711 | wb->base.flags = 0; | |
712 | wb->base.func = barrier_wake; | |
713 | wb->base.private = fence; | |
714 | wb->ref = ref; | |
715 | ||
716 | add_wait_queue(__var_waitqueue(ref), &wb->base); | |
717 | return 0; | |
718 | } | |
719 | ||
29e6ecf3 CW |
720 | static int await_active(struct i915_active *ref, |
721 | unsigned int flags, | |
722 | int (*fn)(void *arg, struct dma_fence *fence), | |
3b0a0579 | 723 | void *arg, struct i915_sw_fence *barrier) |
64d6c500 | 724 | { |
2850748e | 725 | int err = 0; |
64d6c500 | 726 | |
3b0a0579 CW |
727 | if (!i915_active_acquire_if_busy(ref)) |
728 | return 0; | |
729 | ||
442dbc5c CW |
730 | if (flags & I915_ACTIVE_AWAIT_EXCL && |
731 | rcu_access_pointer(ref->excl.fence)) { | |
29e6ecf3 CW |
732 | err = __await_active(&ref->excl, fn, arg); |
733 | if (err) | |
3b0a0579 | 734 | goto out; |
29e6ecf3 CW |
735 | } |
736 | ||
3b0a0579 | 737 | if (flags & I915_ACTIVE_AWAIT_ACTIVE) { |
29e6ecf3 CW |
738 | struct active_node *it, *n; |
739 | ||
740 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { | |
741 | err = __await_active(&it->base, fn, arg); | |
742 | if (err) | |
3b0a0579 | 743 | goto out; |
2850748e | 744 | } |
3b0a0579 CW |
745 | } |
746 | ||
747 | if (flags & I915_ACTIVE_AWAIT_BARRIER) { | |
748 | err = flush_lazy_signals(ref); | |
29e6ecf3 | 749 | if (err) |
3b0a0579 CW |
750 | goto out; |
751 | ||
752 | err = __await_barrier(ref, barrier); | |
753 | if (err) | |
754 | goto out; | |
64d6c500 CW |
755 | } |
756 | ||
3b0a0579 CW |
757 | out: |
758 | i915_active_release(ref); | |
759 | return err; | |
29e6ecf3 CW |
760 | } |
761 | ||
762 | static int rq_await_fence(void *arg, struct dma_fence *fence) | |
763 | { | |
764 | return i915_request_await_dma_fence(arg, fence); | |
765 | } | |
2850748e | 766 | |
29e6ecf3 CW |
767 | int i915_request_await_active(struct i915_request *rq, |
768 | struct i915_active *ref, | |
769 | unsigned int flags) | |
770 | { | |
3b0a0579 | 771 | return await_active(ref, flags, rq_await_fence, rq, &rq->submit); |
29e6ecf3 CW |
772 | } |
773 | ||
774 | static int sw_await_fence(void *arg, struct dma_fence *fence) | |
775 | { | |
776 | return i915_sw_fence_await_dma_fence(arg, fence, 0, | |
777 | GFP_NOWAIT | __GFP_NOWARN); | |
778 | } | |
779 | ||
780 | int i915_sw_fence_await_active(struct i915_sw_fence *fence, | |
781 | struct i915_active *ref, | |
782 | unsigned int flags) | |
783 | { | |
3b0a0579 | 784 | return await_active(ref, flags, sw_await_fence, fence, fence); |
64d6c500 CW |
785 | } |
786 | ||
787 | void i915_active_fini(struct i915_active *ref) | |
788 | { | |
5361db1a | 789 | debug_active_fini(ref); |
12c255b5 | 790 | GEM_BUG_ON(atomic_read(&ref->count)); |
274cbf20 | 791 | GEM_BUG_ON(work_pending(&ref->work)); |
12c255b5 | 792 | mutex_destroy(&ref->mutex); |
99a7f4da CW |
793 | |
794 | if (ref->cache) | |
795 | kmem_cache_free(global.slab_cache, ref->cache); | |
64d6c500 CW |
796 | } |
797 | ||
d8af05ff CW |
798 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) |
799 | { | |
b1e3177b | 800 | return node->timeline == idx && !i915_active_fence_isset(&node->base); |
d8af05ff CW |
801 | } |
802 | ||
803 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) | |
804 | { | |
805 | struct rb_node *prev, *p; | |
806 | ||
807 | if (RB_EMPTY_ROOT(&ref->tree)) | |
808 | return NULL; | |
809 | ||
c9ad602f | 810 | spin_lock_irq(&ref->tree_lock); |
d8af05ff CW |
811 | GEM_BUG_ON(i915_active_is_idle(ref)); |
812 | ||
813 | /* | |
814 | * Try to reuse any existing barrier nodes already allocated for this | |
815 | * i915_active, due to overlapping active phases there is likely a | |
816 | * node kept alive (as we reuse before parking). We prefer to reuse | |
817 | * completely idle barriers (less hassle in manipulating the llists), | |
818 | * but otherwise any will do. | |
819 | */ | |
820 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { | |
821 | p = &ref->cache->node; | |
822 | goto match; | |
823 | } | |
824 | ||
825 | prev = NULL; | |
826 | p = ref->tree.rb_node; | |
827 | while (p) { | |
828 | struct active_node *node = | |
829 | rb_entry(p, struct active_node, node); | |
830 | ||
831 | if (is_idle_barrier(node, idx)) | |
832 | goto match; | |
833 | ||
834 | prev = p; | |
835 | if (node->timeline < idx) | |
836 | p = p->rb_right; | |
837 | else | |
838 | p = p->rb_left; | |
839 | } | |
840 | ||
841 | /* | |
842 | * No quick match, but we did find the leftmost rb_node for the | |
843 | * kernel_context. Walk the rb_tree in-order to see if there were | |
844 | * any idle-barriers on this timeline that we missed, or just use | |
845 | * the first pending barrier. | |
846 | */ | |
847 | for (p = prev; p; p = rb_next(p)) { | |
848 | struct active_node *node = | |
849 | rb_entry(p, struct active_node, node); | |
f130b712 | 850 | struct intel_engine_cs *engine; |
d8af05ff CW |
851 | |
852 | if (node->timeline > idx) | |
853 | break; | |
854 | ||
855 | if (node->timeline < idx) | |
856 | continue; | |
857 | ||
858 | if (is_idle_barrier(node, idx)) | |
859 | goto match; | |
860 | ||
861 | /* | |
862 | * The list of pending barriers is protected by the | |
863 | * kernel_context timeline, which notably we do not hold | |
864 | * here. i915_request_add_active_barriers() may consume | |
865 | * the barrier before we claim it, so we have to check | |
866 | * for success. | |
867 | */ | |
f130b712 CW |
868 | engine = __barrier_to_engine(node); |
869 | smp_rmb(); /* serialise with add_active_barriers */ | |
870 | if (is_barrier(&node->base) && | |
871 | ____active_del_barrier(ref, node, engine)) | |
d8af05ff CW |
872 | goto match; |
873 | } | |
874 | ||
c9ad602f | 875 | spin_unlock_irq(&ref->tree_lock); |
d8af05ff CW |
876 | |
877 | return NULL; | |
878 | ||
879 | match: | |
880 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ | |
881 | if (p == &ref->cache->node) | |
5d934137 | 882 | WRITE_ONCE(ref->cache, NULL); |
c9ad602f | 883 | spin_unlock_irq(&ref->tree_lock); |
d8af05ff CW |
884 | |
885 | return rb_entry(p, struct active_node, node); | |
886 | } | |
887 | ||
ce476c80 CW |
888 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
889 | struct intel_engine_cs *engine) | |
890 | { | |
3f99a614 | 891 | intel_engine_mask_t tmp, mask = engine->mask; |
d4c3c0b8 | 892 | struct llist_node *first = NULL, *last = NULL; |
a50134b1 | 893 | struct intel_gt *gt = engine->gt; |
ce476c80 | 894 | |
b5e8e954 | 895 | GEM_BUG_ON(i915_active_is_idle(ref)); |
84135022 CW |
896 | |
897 | /* Wait until the previous preallocation is completed */ | |
898 | while (!llist_empty(&ref->preallocated_barriers)) | |
899 | cond_resched(); | |
d8af05ff CW |
900 | |
901 | /* | |
902 | * Preallocate a node for each physical engine supporting the target | |
903 | * engine (remember virtual engines have more than one sibling). | |
904 | * We can then use the preallocated nodes in | |
905 | * i915_active_acquire_barrier() | |
906 | */ | |
416d3838 | 907 | GEM_BUG_ON(!mask); |
a50134b1 | 908 | for_each_engine_masked(engine, gt, mask, tmp) { |
75d0a7f3 | 909 | u64 idx = engine->kernel_context->timeline->fence_context; |
d4c3c0b8 | 910 | struct llist_node *prev = first; |
ce476c80 CW |
911 | struct active_node *node; |
912 | ||
d8af05ff CW |
913 | node = reuse_idle_barrier(ref, idx); |
914 | if (!node) { | |
915 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
e714977e | 916 | if (!node) |
d8af05ff | 917 | goto unwind; |
d8af05ff | 918 | |
b1e3177b CW |
919 | RCU_INIT_POINTER(node->base.fence, NULL); |
920 | node->base.cb.func = node_retire; | |
d8af05ff CW |
921 | node->timeline = idx; |
922 | node->ref = ref; | |
ce476c80 CW |
923 | } |
924 | ||
b1e3177b | 925 | if (!i915_active_fence_isset(&node->base)) { |
d8af05ff CW |
926 | /* |
927 | * Mark this as being *our* unconnected proto-node. | |
928 | * | |
929 | * Since this node is not in any list, and we have | |
930 | * decoupled it from the rbtree, we can reuse the | |
931 | * request to indicate this is an idle-barrier node | |
932 | * and then we can use the rb_node and list pointers | |
933 | * for our tracking of the pending barrier. | |
934 | */ | |
b1e3177b CW |
935 | RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); |
936 | node->base.cb.node.prev = (void *)engine; | |
5d934137 | 937 | __i915_active_acquire(ref); |
d8af05ff | 938 | } |
df9f85d8 | 939 | GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); |
ce476c80 | 940 | |
d8af05ff | 941 | GEM_BUG_ON(barrier_to_engine(node) != engine); |
d4c3c0b8 JRS |
942 | first = barrier_to_ll(node); |
943 | first->next = prev; | |
944 | if (!last) | |
945 | last = first; | |
7009db14 | 946 | intel_engine_pm_get(engine); |
ce476c80 CW |
947 | } |
948 | ||
84135022 | 949 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); |
d4c3c0b8 | 950 | llist_add_batch(first, last, &ref->preallocated_barriers); |
84135022 | 951 | |
7009db14 CW |
952 | return 0; |
953 | ||
954 | unwind: | |
d4c3c0b8 JRS |
955 | while (first) { |
956 | struct active_node *node = barrier_from_ll(first); | |
7009db14 | 957 | |
d4c3c0b8 | 958 | first = first->next; |
84135022 | 959 | |
d8af05ff CW |
960 | atomic_dec(&ref->count); |
961 | intel_engine_pm_put(barrier_to_engine(node)); | |
7009db14 | 962 | |
7009db14 CW |
963 | kmem_cache_free(global.slab_cache, node); |
964 | } | |
e714977e | 965 | return -ENOMEM; |
ce476c80 CW |
966 | } |
967 | ||
968 | void i915_active_acquire_barrier(struct i915_active *ref) | |
969 | { | |
970 | struct llist_node *pos, *next; | |
c9ad602f | 971 | unsigned long flags; |
ce476c80 | 972 | |
12c255b5 | 973 | GEM_BUG_ON(i915_active_is_idle(ref)); |
ce476c80 | 974 | |
d8af05ff CW |
975 | /* |
976 | * Transfer the list of preallocated barriers into the | |
977 | * i915_active rbtree, but only as proto-nodes. They will be | |
978 | * populated by i915_request_add_active_barriers() to point to the | |
979 | * request that will eventually release them. | |
980 | */ | |
d8af05ff CW |
981 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
982 | struct active_node *node = barrier_from_ll(pos); | |
983 | struct intel_engine_cs *engine = barrier_to_engine(node); | |
ce476c80 CW |
984 | struct rb_node **p, *parent; |
985 | ||
07779a76 CW |
986 | spin_lock_irqsave_nested(&ref->tree_lock, flags, |
987 | SINGLE_DEPTH_NESTING); | |
ce476c80 CW |
988 | parent = NULL; |
989 | p = &ref->tree.rb_node; | |
990 | while (*p) { | |
d8af05ff CW |
991 | struct active_node *it; |
992 | ||
ce476c80 | 993 | parent = *p; |
d8af05ff CW |
994 | |
995 | it = rb_entry(parent, struct active_node, node); | |
996 | if (it->timeline < node->timeline) | |
ce476c80 CW |
997 | p = &parent->rb_right; |
998 | else | |
999 | p = &parent->rb_left; | |
1000 | } | |
1001 | rb_link_node(&node->node, parent, p); | |
1002 | rb_insert_color(&node->node, &ref->tree); | |
07779a76 | 1003 | spin_unlock_irqrestore(&ref->tree_lock, flags); |
ce476c80 | 1004 | |
b7234840 | 1005 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
d8af05ff | 1006 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); |
edee52c9 | 1007 | intel_engine_pm_put_delay(engine, 1); |
ce476c80 | 1008 | } |
ce476c80 CW |
1009 | } |
1010 | ||
df9f85d8 CW |
1011 | static struct dma_fence **ll_to_fence_slot(struct llist_node *node) |
1012 | { | |
1013 | return __active_fence_slot(&barrier_from_ll(node)->base); | |
1014 | } | |
1015 | ||
d8af05ff | 1016 | void i915_request_add_active_barriers(struct i915_request *rq) |
ce476c80 CW |
1017 | { |
1018 | struct intel_engine_cs *engine = rq->engine; | |
1019 | struct llist_node *node, *next; | |
b1e3177b | 1020 | unsigned long flags; |
ce476c80 | 1021 | |
e6ba7648 | 1022 | GEM_BUG_ON(!intel_context_is_barrier(rq->context)); |
d8af05ff | 1023 | GEM_BUG_ON(intel_engine_is_virtual(engine)); |
d19d71fc | 1024 | GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); |
d8af05ff | 1025 | |
b1e3177b CW |
1026 | node = llist_del_all(&engine->barrier_tasks); |
1027 | if (!node) | |
1028 | return; | |
d8af05ff CW |
1029 | /* |
1030 | * Attach the list of proto-fences to the in-flight request such | |
1031 | * that the parent i915_active will be released when this request | |
1032 | * is retired. | |
1033 | */ | |
b1e3177b CW |
1034 | spin_lock_irqsave(&rq->lock, flags); |
1035 | llist_for_each_safe(node, next, node) { | |
df9f85d8 CW |
1036 | /* serialise with reuse_idle_barrier */ |
1037 | smp_store_mb(*ll_to_fence_slot(node), &rq->fence); | |
b1e3177b CW |
1038 | list_add_tail((struct list_head *)node, &rq->fence.cb_list); |
1039 | } | |
1040 | spin_unlock_irqrestore(&rq->lock, flags); | |
1041 | } | |
1042 | ||
b1e3177b CW |
1043 | /* |
1044 | * __i915_active_fence_set: Update the last active fence along its timeline | |
1045 | * @active: the active tracker | |
1046 | * @fence: the new fence (under construction) | |
1047 | * | |
1048 | * Records the new @fence as the last active fence along its timeline in | |
1049 | * this active tracker, moving the tracking callbacks from the previous | |
1050 | * fence onto this one. Returns the previous fence (if not already completed), | |
1051 | * which the caller must ensure is executed before the new fence. To ensure | |
1052 | * that the order of fences within the timeline of the i915_active_fence is | |
df9f85d8 | 1053 | * understood, it should be locked by the caller. |
b1e3177b CW |
1054 | */ |
1055 | struct dma_fence * | |
1056 | __i915_active_fence_set(struct i915_active_fence *active, | |
1057 | struct dma_fence *fence) | |
1058 | { | |
1059 | struct dma_fence *prev; | |
1060 | unsigned long flags; | |
1061 | ||
df9f85d8 CW |
1062 | if (fence == rcu_access_pointer(active->fence)) |
1063 | return fence; | |
1064 | ||
b1e3177b CW |
1065 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); |
1066 | ||
df9f85d8 CW |
1067 | /* |
1068 | * Consider that we have two threads arriving (A and B), with | |
1069 | * C already resident as the active->fence. | |
1070 | * | |
1071 | * A does the xchg first, and so it sees C or NULL depending | |
1072 | * on the timing of the interrupt handler. If it is NULL, the | |
1073 | * previous fence must have been signaled and we know that | |
1074 | * we are first on the timeline. If it is still present, | |
1075 | * we acquire the lock on that fence and serialise with the interrupt | |
1076 | * handler, in the process removing it from any future interrupt | |
1077 | * callback. A will then wait on C before executing (if present). | |
1078 | * | |
1079 | * As B is second, it sees A as the previous fence and so waits for | |
1080 | * it to complete its transition and takes over the occupancy for | |
1081 | * itself -- remembering that it needs to wait on A before executing. | |
1082 | * | |
1083 | * Note the strong ordering of the timeline also provides consistent | |
1084 | * nesting rules for the fence->lock; the inner lock is always the | |
1085 | * older lock. | |
1086 | */ | |
1087 | spin_lock_irqsave(fence->lock, flags); | |
1088 | prev = xchg(__active_fence_slot(active), fence); | |
b1e3177b CW |
1089 | if (prev) { |
1090 | GEM_BUG_ON(prev == fence); | |
1091 | spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); | |
1092 | __list_del_entry(&active->cb.node); | |
1093 | spin_unlock(prev->lock); /* serialise with prev->cb_list */ | |
d8af05ff | 1094 | } |
b1e3177b | 1095 | list_add_tail(&active->cb.node, &fence->cb_list); |
b1e3177b CW |
1096 | spin_unlock_irqrestore(fence->lock, flags); |
1097 | ||
1098 | return prev; | |
ce476c80 CW |
1099 | } |
1100 | ||
b1e3177b CW |
1101 | int i915_active_fence_set(struct i915_active_fence *active, |
1102 | struct i915_request *rq) | |
21950ee7 | 1103 | { |
b1e3177b CW |
1104 | struct dma_fence *fence; |
1105 | int err = 0; | |
21950ee7 | 1106 | |
b1e3177b CW |
1107 | /* Must maintain timeline ordering wrt previous active requests */ |
1108 | rcu_read_lock(); | |
1109 | fence = __i915_active_fence_set(active, &rq->fence); | |
1110 | if (fence) /* but the previous fence may not belong to that timeline! */ | |
1111 | fence = dma_fence_get_rcu(fence); | |
1112 | rcu_read_unlock(); | |
1113 | if (fence) { | |
1114 | err = i915_request_await_dma_fence(rq, fence); | |
1115 | dma_fence_put(fence); | |
1116 | } | |
21950ee7 | 1117 | |
b1e3177b | 1118 | return err; |
21950ee7 CW |
1119 | } |
1120 | ||
b1e3177b | 1121 | void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) |
21950ee7 | 1122 | { |
df9f85d8 | 1123 | active_fence_cb(fence, cb); |
21950ee7 CW |
1124 | } |
1125 | ||
229007e0 CW |
1126 | struct auto_active { |
1127 | struct i915_active base; | |
1128 | struct kref ref; | |
1129 | }; | |
1130 | ||
1131 | struct i915_active *i915_active_get(struct i915_active *ref) | |
1132 | { | |
1133 | struct auto_active *aa = container_of(ref, typeof(*aa), base); | |
1134 | ||
1135 | kref_get(&aa->ref); | |
1136 | return &aa->base; | |
1137 | } | |
1138 | ||
1139 | static void auto_release(struct kref *ref) | |
1140 | { | |
1141 | struct auto_active *aa = container_of(ref, typeof(*aa), ref); | |
1142 | ||
1143 | i915_active_fini(&aa->base); | |
1144 | kfree(aa); | |
1145 | } | |
1146 | ||
1147 | void i915_active_put(struct i915_active *ref) | |
1148 | { | |
1149 | struct auto_active *aa = container_of(ref, typeof(*aa), base); | |
1150 | ||
1151 | kref_put(&aa->ref, auto_release); | |
1152 | } | |
1153 | ||
1154 | static int auto_active(struct i915_active *ref) | |
1155 | { | |
1156 | i915_active_get(ref); | |
1157 | return 0; | |
1158 | } | |
1159 | ||
1160 | static void auto_retire(struct i915_active *ref) | |
1161 | { | |
1162 | i915_active_put(ref); | |
1163 | } | |
1164 | ||
1165 | struct i915_active *i915_active_create(void) | |
1166 | { | |
1167 | struct auto_active *aa; | |
1168 | ||
1169 | aa = kmalloc(sizeof(*aa), GFP_KERNEL); | |
1170 | if (!aa) | |
1171 | return NULL; | |
1172 | ||
1173 | kref_init(&aa->ref); | |
1174 | i915_active_init(&aa->base, auto_active, auto_retire); | |
1175 | ||
1176 | return &aa->base; | |
1177 | } | |
1178 | ||
64d6c500 CW |
1179 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1180 | #include "selftests/i915_active.c" | |
1181 | #endif | |
5f5c139d | 1182 | |
103b76ee | 1183 | static void i915_global_active_shrink(void) |
5f5c139d | 1184 | { |
103b76ee | 1185 | kmem_cache_shrink(global.slab_cache); |
5f5c139d CW |
1186 | } |
1187 | ||
103b76ee | 1188 | static void i915_global_active_exit(void) |
32eb6bcf | 1189 | { |
103b76ee | 1190 | kmem_cache_destroy(global.slab_cache); |
32eb6bcf CW |
1191 | } |
1192 | ||
103b76ee CW |
1193 | static struct i915_global_active global = { { |
1194 | .shrink = i915_global_active_shrink, | |
1195 | .exit = i915_global_active_exit, | |
1196 | } }; | |
1197 | ||
1198 | int __init i915_global_active_init(void) | |
5f5c139d | 1199 | { |
103b76ee CW |
1200 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
1201 | if (!global.slab_cache) | |
1202 | return -ENOMEM; | |
1203 | ||
1204 | i915_global_register(&global.base); | |
1205 | return 0; | |
5f5c139d | 1206 | } |