drm/i915: Keep the most recently used active-fence upon discard
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_active.c
CommitLineData
64d6c500
CW
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2019 Intel Corporation
5 */
6
5361db1a
CW
7#include <linux/debugobjects.h>
8
e6ba7648 9#include "gt/intel_context.h"
d13a3177 10#include "gt/intel_engine_heartbeat.h"
7009db14 11#include "gt/intel_engine_pm.h"
2871ea85 12#include "gt/intel_ring.h"
7009db14 13
64d6c500
CW
14#include "i915_drv.h"
15#include "i915_active.h"
103b76ee 16#include "i915_globals.h"
64d6c500 17
5f5c139d
CW
18/*
19 * Active refs memory management
20 *
21 * To be more economical with memory, we reap all the i915_active trees as
22 * they idle (when we know the active requests are inactive) and allocate the
23 * nodes from a local slab cache to hopefully reduce the fragmentation.
24 */
25static struct i915_global_active {
103b76ee 26 struct i915_global base;
5f5c139d
CW
27 struct kmem_cache *slab_cache;
28} global;
29
64d6c500 30struct active_node {
5d934137 31 struct rb_node node;
b1e3177b 32 struct i915_active_fence base;
64d6c500 33 struct i915_active *ref;
64d6c500
CW
34 u64 timeline;
35};
36
5d934137
CW
37#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
38
d8af05ff 39static inline struct active_node *
b1e3177b 40node_from_active(struct i915_active_fence *active)
d8af05ff
CW
41{
42 return container_of(active, struct active_node, base);
43}
44
45#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
46
b1e3177b 47static inline bool is_barrier(const struct i915_active_fence *active)
d8af05ff 48{
b1e3177b 49 return IS_ERR(rcu_access_pointer(active->fence));
d8af05ff
CW
50}
51
52static inline struct llist_node *barrier_to_ll(struct active_node *node)
53{
54 GEM_BUG_ON(!is_barrier(&node->base));
b1e3177b 55 return (struct llist_node *)&node->base.cb.node;
d8af05ff
CW
56}
57
f130b712
CW
58static inline struct intel_engine_cs *
59__barrier_to_engine(struct active_node *node)
60{
b1e3177b 61 return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
f130b712
CW
62}
63
d8af05ff
CW
64static inline struct intel_engine_cs *
65barrier_to_engine(struct active_node *node)
66{
67 GEM_BUG_ON(!is_barrier(&node->base));
f130b712 68 return __barrier_to_engine(node);
d8af05ff
CW
69}
70
71static inline struct active_node *barrier_from_ll(struct llist_node *x)
72{
73 return container_of((struct list_head *)x,
b1e3177b 74 struct active_node, base.cb.node);
d8af05ff
CW
75}
76
5361db1a
CW
77#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
78
79static void *active_debug_hint(void *addr)
80{
81 struct i915_active *ref = addr;
82
12c255b5 83 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
5361db1a
CW
84}
85
86static struct debug_obj_descr active_debug_desc = {
87 .name = "i915_active",
88 .debug_hint = active_debug_hint,
89};
90
91static void debug_active_init(struct i915_active *ref)
92{
93 debug_object_init(ref, &active_debug_desc);
94}
95
96static void debug_active_activate(struct i915_active *ref)
97{
bbca083d 98 lockdep_assert_held(&ref->tree_lock);
f52c6d0d
CW
99 if (!atomic_read(&ref->count)) /* before the first inc */
100 debug_object_activate(ref, &active_debug_desc);
5361db1a
CW
101}
102
103static void debug_active_deactivate(struct i915_active *ref)
104{
c9ad602f 105 lockdep_assert_held(&ref->tree_lock);
f52c6d0d
CW
106 if (!atomic_read(&ref->count)) /* after the last dec */
107 debug_object_deactivate(ref, &active_debug_desc);
5361db1a
CW
108}
109
110static void debug_active_fini(struct i915_active *ref)
111{
112 debug_object_free(ref, &active_debug_desc);
113}
114
115static void debug_active_assert(struct i915_active *ref)
116{
117 debug_object_assert_init(ref, &active_debug_desc);
118}
119
120#else
121
122static inline void debug_active_init(struct i915_active *ref) { }
123static inline void debug_active_activate(struct i915_active *ref) { }
124static inline void debug_active_deactivate(struct i915_active *ref) { }
125static inline void debug_active_fini(struct i915_active *ref) { }
126static inline void debug_active_assert(struct i915_active *ref) { }
127
128#endif
129
a42375af 130static void
12c255b5 131__active_retire(struct i915_active *ref)
a42375af 132{
99a7f4da 133 struct rb_root root = RB_ROOT;
a42375af 134 struct active_node *it, *n;
c9ad602f 135 unsigned long flags;
12c255b5 136
274cbf20 137 GEM_BUG_ON(i915_active_is_idle(ref));
12c255b5
CW
138
139 /* return the unused nodes to our slabcache -- flushing the allocator */
c9ad602f 140 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
12c255b5
CW
141 return;
142
b1e3177b 143 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
c9ad602f
CW
144 debug_active_deactivate(ref);
145
99a7f4da
CW
146 /* Even if we have not used the cache, we may still have a barrier */
147 if (!ref->cache)
148 ref->cache = fetch_node(ref->tree.rb_node);
149
150 /* Keep the MRU cached node for reuse */
151 if (ref->cache) {
152 /* Discard all other nodes in the tree */
153 rb_erase(&ref->cache->node, &ref->tree);
154 root = ref->tree;
155
156 /* Rebuild the tree with only the cached node */
157 rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
158 rb_insert_color(&ref->cache->node, &ref->tree);
159 GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
160 }
c9ad602f
CW
161
162 spin_unlock_irqrestore(&ref->tree_lock, flags);
e1d7b66b
CW
163
164 /* After the final retire, the entire struct may be freed */
165 if (ref->retire)
166 ref->retire(ref);
b1e3177b
CW
167
168 /* ... except if you wait on it, you must manage your own references! */
169 wake_up_var(ref);
c9ad602f 170
99a7f4da 171 /* Finally free the discarded timeline tree */
c9ad602f
CW
172 rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
173 GEM_BUG_ON(i915_active_fence_isset(&it->base));
174 kmem_cache_free(global.slab_cache, it);
175 }
a42375af
CW
176}
177
274cbf20
CW
178static void
179active_work(struct work_struct *wrk)
180{
181 struct i915_active *ref = container_of(wrk, typeof(*ref), work);
182
183 GEM_BUG_ON(!atomic_read(&ref->count));
184 if (atomic_add_unless(&ref->count, -1, 1))
185 return;
186
274cbf20
CW
187 __active_retire(ref);
188}
189
64d6c500 190static void
12c255b5 191active_retire(struct i915_active *ref)
64d6c500 192{
12c255b5
CW
193 GEM_BUG_ON(!atomic_read(&ref->count));
194 if (atomic_add_unless(&ref->count, -1, 1))
a42375af
CW
195 return;
196
c9ad602f 197 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
274cbf20
CW
198 queue_work(system_unbound_wq, &ref->work);
199 return;
200 }
201
12c255b5 202 __active_retire(ref);
64d6c500
CW
203}
204
df9f85d8
CW
205static inline struct dma_fence **
206__active_fence_slot(struct i915_active_fence *active)
207{
208 return (struct dma_fence ** __force)&active->fence;
209}
210
211static inline bool
212active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
213{
214 struct i915_active_fence *active =
215 container_of(cb, typeof(*active), cb);
216
217 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
218}
219
64d6c500 220static void
b1e3177b 221node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
64d6c500 222{
df9f85d8
CW
223 if (active_fence_cb(fence, cb))
224 active_retire(container_of(cb, struct active_node, base.cb)->ref);
64d6c500
CW
225}
226
b1e3177b
CW
227static void
228excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
229{
df9f85d8
CW
230 if (active_fence_cb(fence, cb))
231 active_retire(container_of(cb, struct i915_active, excl.cb));
b1e3177b
CW
232}
233
5d934137 234static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
64d6c500 235{
5d934137 236 struct active_node *it;
64d6c500
CW
237
238 /*
239 * We track the most recently used timeline to skip a rbtree search
240 * for the common case, under typical loads we never need the rbtree
241 * at all. We can reuse the last slot if it is empty, that is
242 * after the previous activity has been retired, or if it matches the
243 * current timeline.
64d6c500 244 */
5d934137
CW
245 it = READ_ONCE(ref->cache);
246 if (it && it->timeline == idx)
247 return it;
248
249 BUILD_BUG_ON(offsetof(typeof(*it), node));
250
251 /* While active, the tree can only be built; not destroyed */
252 GEM_BUG_ON(i915_active_is_idle(ref));
253
254 it = fetch_node(ref->tree.rb_node);
255 while (it) {
256 if (it->timeline < idx) {
257 it = fetch_node(it->node.rb_right);
258 } else if (it->timeline > idx) {
259 it = fetch_node(it->node.rb_left);
260 } else {
261 WRITE_ONCE(ref->cache, it);
262 break;
263 }
264 }
265
266 /* NB: If the tree rotated beneath us, we may miss our target. */
267 return it;
268}
269
270static struct i915_active_fence *
271active_instance(struct i915_active *ref, u64 idx)
272{
273 struct active_node *node, *prealloc;
274 struct rb_node **p, *parent;
275
276 node = __active_lookup(ref, idx);
277 if (likely(node))
12c255b5
CW
278 return &node->base;
279
280 /* Preallocate a replacement, just in case */
281 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
282 if (!prealloc)
283 return NULL;
64d6c500 284
c9ad602f 285 spin_lock_irq(&ref->tree_lock);
12c255b5 286 GEM_BUG_ON(i915_active_is_idle(ref));
64d6c500
CW
287
288 parent = NULL;
289 p = &ref->tree.rb_node;
290 while (*p) {
291 parent = *p;
292
293 node = rb_entry(parent, struct active_node, node);
12c255b5
CW
294 if (node->timeline == idx) {
295 kmem_cache_free(global.slab_cache, prealloc);
296 goto out;
297 }
64d6c500
CW
298
299 if (node->timeline < idx)
300 p = &parent->rb_right;
301 else
302 p = &parent->rb_left;
303 }
304
12c255b5 305 node = prealloc;
df9f85d8 306 __i915_active_fence_init(&node->base, NULL, node_retire);
64d6c500
CW
307 node->ref = ref;
308 node->timeline = idx;
309
310 rb_link_node(&node->node, parent, p);
311 rb_insert_color(&node->node, &ref->tree);
312
64d6c500 313out:
5d934137 314 WRITE_ONCE(ref->cache, node);
c9ad602f 315 spin_unlock_irq(&ref->tree_lock);
12c255b5
CW
316
317 return &node->base;
64d6c500
CW
318}
319
b1e3177b 320void __i915_active_init(struct i915_active *ref,
12c255b5
CW
321 int (*active)(struct i915_active *ref),
322 void (*retire)(struct i915_active *ref),
ae303004
CW
323 struct lock_class_key *mkey,
324 struct lock_class_key *wkey)
64d6c500 325{
274cbf20
CW
326 unsigned long bits;
327
5361db1a
CW
328 debug_active_init(ref);
329
79c7a28e 330 ref->flags = 0;
12c255b5 331 ref->active = active;
274cbf20
CW
332 ref->retire = ptr_unpack_bits(retire, &bits, 2);
333 if (bits & I915_ACTIVE_MAY_SLEEP)
334 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
2850748e 335
c9ad602f 336 spin_lock_init(&ref->tree_lock);
64d6c500 337 ref->tree = RB_ROOT;
12c255b5 338 ref->cache = NULL;
c9ad602f 339
d8af05ff 340 init_llist_head(&ref->preallocated_barriers);
12c255b5 341 atomic_set(&ref->count, 0);
ae303004 342 __mutex_init(&ref->mutex, "i915_active", mkey);
df9f85d8 343 __i915_active_fence_init(&ref->excl, NULL, excl_retire);
274cbf20 344 INIT_WORK(&ref->work, active_work);
ae303004
CW
345#if IS_ENABLED(CONFIG_LOCKDEP)
346 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
347#endif
64d6c500
CW
348}
349
f130b712
CW
350static bool ____active_del_barrier(struct i915_active *ref,
351 struct active_node *node,
352 struct intel_engine_cs *engine)
353
d8af05ff 354{
d8af05ff
CW
355 struct llist_node *head = NULL, *tail = NULL;
356 struct llist_node *pos, *next;
357
75d0a7f3 358 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
d8af05ff
CW
359
360 /*
361 * Rebuild the llist excluding our node. We may perform this
362 * outside of the kernel_context timeline mutex and so someone
363 * else may be manipulating the engine->barrier_tasks, in
364 * which case either we or they will be upset :)
365 *
366 * A second __active_del_barrier() will report failure to claim
367 * the active_node and the caller will just shrug and know not to
368 * claim ownership of its node.
369 *
370 * A concurrent i915_request_add_active_barriers() will miss adding
371 * any of the tasks, but we will try again on the next -- and since
372 * we are actively using the barrier, we know that there will be
373 * at least another opportunity when we idle.
374 */
375 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
376 if (node == barrier_from_ll(pos)) {
377 node = NULL;
378 continue;
379 }
380
381 pos->next = head;
382 head = pos;
383 if (!tail)
384 tail = pos;
385 }
386 if (head)
387 llist_add_batch(head, tail, &engine->barrier_tasks);
388
389 return !node;
390}
391
f130b712
CW
392static bool
393__active_del_barrier(struct i915_active *ref, struct active_node *node)
394{
395 return ____active_del_barrier(ref, node, barrier_to_engine(node));
396}
397
5d934137
CW
398static bool
399replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
400{
401 if (!is_barrier(active)) /* proto-node used by our idle barrier? */
402 return false;
403
404 /*
405 * This request is on the kernel_context timeline, and so
406 * we can use it to substitute for the pending idle-barrer
407 * request that we want to emit on the kernel_context.
408 */
409 __active_del_barrier(ref, node_from_active(active));
410 return true;
411}
412
413int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
64d6c500 414{
b1e3177b 415 struct i915_active_fence *active;
12c255b5 416 int err;
312c4ba1
CW
417
418 /* Prevent reaping in case we malloc/wait while building the tree */
12c255b5
CW
419 err = i915_active_acquire(ref);
420 if (err)
421 return err;
64d6c500 422
5d934137 423 active = active_instance(ref, idx);
12c255b5
CW
424 if (!active) {
425 err = -ENOMEM;
312c4ba1
CW
426 goto out;
427 }
64d6c500 428
5d934137 429 if (replace_barrier(ref, active)) {
b1e3177b
CW
430 RCU_INIT_POINTER(active->fence, NULL);
431 atomic_dec(&ref->count);
d8af05ff 432 }
b1e3177b 433 if (!__i915_active_fence_set(active, fence))
5d934137 434 __i915_active_acquire(ref);
64d6c500 435
312c4ba1
CW
436out:
437 i915_active_release(ref);
438 return err;
64d6c500
CW
439}
440
5d934137
CW
441static struct dma_fence *
442__i915_active_set_fence(struct i915_active *ref,
443 struct i915_active_fence *active,
444 struct dma_fence *fence)
2850748e 445{
e3793468
CW
446 struct dma_fence *prev;
447
5d934137
CW
448 if (replace_barrier(ref, active)) {
449 RCU_INIT_POINTER(active->fence, fence);
450 return NULL;
451 }
2850748e 452
30ca04e1 453 rcu_read_lock();
5d934137 454 prev = __i915_active_fence_set(active, fence);
30ca04e1
CW
455 if (prev)
456 prev = dma_fence_get_rcu(prev);
457 else
5d934137 458 __i915_active_acquire(ref);
30ca04e1 459 rcu_read_unlock();
e3793468
CW
460
461 return prev;
b1e3177b 462}
2850748e 463
5d934137
CW
464static struct i915_active_fence *
465__active_fence(struct i915_active *ref, u64 idx)
466{
467 struct active_node *it;
468
469 it = __active_lookup(ref, idx);
470 if (unlikely(!it)) { /* Contention with parallel tree builders! */
471 spin_lock_irq(&ref->tree_lock);
472 it = __active_lookup(ref, idx);
473 spin_unlock_irq(&ref->tree_lock);
474 }
475 GEM_BUG_ON(!it); /* slot must be preallocated */
476
477 return &it->base;
478}
479
480struct dma_fence *
481__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
482{
483 /* Only valid while active, see i915_active_acquire_for_context() */
484 return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
485}
486
487struct dma_fence *
488i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
489{
490 /* We expect the caller to manage the exclusive timeline ordering */
491 return __i915_active_set_fence(ref, &ref->excl, f);
492}
493
b1e3177b
CW
494bool i915_active_acquire_if_busy(struct i915_active *ref)
495{
496 debug_active_assert(ref);
497 return atomic_add_unless(&ref->count, 1, 0);
2850748e
CW
498}
499
04240e30
CW
500static void __i915_active_activate(struct i915_active *ref)
501{
502 spin_lock_irq(&ref->tree_lock); /* __active_retire() */
503 if (!atomic_fetch_inc(&ref->count))
504 debug_active_activate(ref);
505 spin_unlock_irq(&ref->tree_lock);
506}
507
12c255b5 508int i915_active_acquire(struct i915_active *ref)
64d6c500 509{
12c255b5
CW
510 int err;
511
b1e3177b 512 if (i915_active_acquire_if_busy(ref))
12c255b5 513 return 0;
5361db1a 514
04240e30
CW
515 if (!ref->active) {
516 __i915_active_activate(ref);
517 return 0;
518 }
519
12c255b5
CW
520 err = mutex_lock_interruptible(&ref->mutex);
521 if (err)
522 return err;
5361db1a 523
ac0e331a 524 if (likely(!i915_active_acquire_if_busy(ref))) {
04240e30
CW
525 err = ref->active(ref);
526 if (!err)
527 __i915_active_activate(ref);
12c255b5
CW
528 }
529
530 mutex_unlock(&ref->mutex);
531
532 return err;
64d6c500
CW
533}
534
5d934137
CW
535int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
536{
537 struct i915_active_fence *active;
538 int err;
539
540 err = i915_active_acquire(ref);
541 if (err)
542 return err;
543
544 active = active_instance(ref, idx);
545 if (!active) {
546 i915_active_release(ref);
547 return -ENOMEM;
548 }
549
550 return 0; /* return with active ref */
551}
552
64d6c500
CW
553void i915_active_release(struct i915_active *ref)
554{
5361db1a 555 debug_active_assert(ref);
12c255b5 556 active_retire(ref);
64d6c500
CW
557}
558
b1e3177b 559static void enable_signaling(struct i915_active_fence *active)
79c7a28e 560{
b1e3177b 561 struct dma_fence *fence;
79c7a28e 562
c0e31018
CW
563 if (unlikely(is_barrier(active)))
564 return;
565
b1e3177b
CW
566 fence = i915_active_fence_get(active);
567 if (!fence)
568 return;
2850748e 569
b1e3177b
CW
570 dma_fence_enable_sw_signaling(fence);
571 dma_fence_put(fence);
2850748e
CW
572}
573
d13a3177 574static int flush_barrier(struct active_node *it)
64d6c500 575{
d13a3177 576 struct intel_engine_cs *engine;
64d6c500 577
d13a3177
CW
578 if (likely(!is_barrier(&it->base)))
579 return 0;
12c255b5 580
d13a3177
CW
581 engine = __barrier_to_engine(it);
582 smp_rmb(); /* serialise with add_active_barriers */
583 if (!is_barrier(&it->base))
12c255b5 584 return 0;
2850748e 585
d13a3177
CW
586 return intel_engine_flush_barriers(engine);
587}
588
589static int flush_lazy_signals(struct i915_active *ref)
590{
591 struct active_node *it, *n;
592 int err = 0;
593
b1e3177b 594 enable_signaling(&ref->excl);
64d6c500 595 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
d13a3177
CW
596 err = flush_barrier(it); /* unconnected idle barrier? */
597 if (err)
598 break;
d8af05ff 599
b1e3177b 600 enable_signaling(&it->base);
64d6c500
CW
601 }
602
d13a3177
CW
603 return err;
604}
605
d75a92a8 606int __i915_active_wait(struct i915_active *ref, int state)
d13a3177
CW
607{
608 int err;
609
610 might_sleep();
611
612 if (!i915_active_acquire_if_busy(ref))
613 return 0;
614
615 /* Any fence added after the wait begins will not be auto-signaled */
616 err = flush_lazy_signals(ref);
b1e3177b 617 i915_active_release(ref);
afd1bcd4
CW
618 if (err)
619 return err;
620
d75a92a8
CW
621 if (!i915_active_is_idle(ref) &&
622 ___wait_var_event(ref, i915_active_is_idle(ref),
623 state, 0, 0, schedule()))
79c7a28e
CW
624 return -EINTR;
625
e1cda6a5 626 flush_work(&ref->work);
afd1bcd4 627 return 0;
64d6c500
CW
628}
629
29e6ecf3
CW
630static int __await_active(struct i915_active_fence *active,
631 int (*fn)(void *arg, struct dma_fence *fence),
632 void *arg)
633{
634 struct dma_fence *fence;
635
636 if (is_barrier(active)) /* XXX flush the barrier? */
637 return 0;
638
639 fence = i915_active_fence_get(active);
640 if (fence) {
641 int err;
642
643 err = fn(arg, fence);
644 dma_fence_put(fence);
645 if (err < 0)
646 return err;
647 }
648
649 return 0;
650}
651
3b0a0579
CW
652struct wait_barrier {
653 struct wait_queue_entry base;
654 struct i915_active *ref;
655};
656
657static int
658barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
659{
660 struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
661
662 if (i915_active_is_idle(wb->ref)) {
663 list_del(&wq->entry);
664 i915_sw_fence_complete(wq->private);
665 kfree(wq);
666 }
667
668 return 0;
669}
670
671static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
672{
673 struct wait_barrier *wb;
674
675 wb = kmalloc(sizeof(*wb), GFP_KERNEL);
676 if (unlikely(!wb))
677 return -ENOMEM;
678
679 GEM_BUG_ON(i915_active_is_idle(ref));
680 if (!i915_sw_fence_await(fence)) {
681 kfree(wb);
682 return -EINVAL;
683 }
684
685 wb->base.flags = 0;
686 wb->base.func = barrier_wake;
687 wb->base.private = fence;
688 wb->ref = ref;
689
690 add_wait_queue(__var_waitqueue(ref), &wb->base);
691 return 0;
692}
693
29e6ecf3
CW
694static int await_active(struct i915_active *ref,
695 unsigned int flags,
696 int (*fn)(void *arg, struct dma_fence *fence),
3b0a0579 697 void *arg, struct i915_sw_fence *barrier)
64d6c500 698{
2850748e 699 int err = 0;
64d6c500 700
3b0a0579
CW
701 if (!i915_active_acquire_if_busy(ref))
702 return 0;
703
442dbc5c
CW
704 if (flags & I915_ACTIVE_AWAIT_EXCL &&
705 rcu_access_pointer(ref->excl.fence)) {
29e6ecf3
CW
706 err = __await_active(&ref->excl, fn, arg);
707 if (err)
3b0a0579 708 goto out;
29e6ecf3
CW
709 }
710
3b0a0579 711 if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
29e6ecf3
CW
712 struct active_node *it, *n;
713
714 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
715 err = __await_active(&it->base, fn, arg);
716 if (err)
3b0a0579 717 goto out;
2850748e 718 }
3b0a0579
CW
719 }
720
721 if (flags & I915_ACTIVE_AWAIT_BARRIER) {
722 err = flush_lazy_signals(ref);
29e6ecf3 723 if (err)
3b0a0579
CW
724 goto out;
725
726 err = __await_barrier(ref, barrier);
727 if (err)
728 goto out;
64d6c500
CW
729 }
730
3b0a0579
CW
731out:
732 i915_active_release(ref);
733 return err;
29e6ecf3
CW
734}
735
736static int rq_await_fence(void *arg, struct dma_fence *fence)
737{
738 return i915_request_await_dma_fence(arg, fence);
739}
2850748e 740
29e6ecf3
CW
741int i915_request_await_active(struct i915_request *rq,
742 struct i915_active *ref,
743 unsigned int flags)
744{
3b0a0579 745 return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
29e6ecf3
CW
746}
747
748static int sw_await_fence(void *arg, struct dma_fence *fence)
749{
750 return i915_sw_fence_await_dma_fence(arg, fence, 0,
751 GFP_NOWAIT | __GFP_NOWARN);
752}
753
754int i915_sw_fence_await_active(struct i915_sw_fence *fence,
755 struct i915_active *ref,
756 unsigned int flags)
757{
3b0a0579 758 return await_active(ref, flags, sw_await_fence, fence, fence);
64d6c500
CW
759}
760
761void i915_active_fini(struct i915_active *ref)
762{
5361db1a 763 debug_active_fini(ref);
12c255b5 764 GEM_BUG_ON(atomic_read(&ref->count));
274cbf20 765 GEM_BUG_ON(work_pending(&ref->work));
12c255b5 766 mutex_destroy(&ref->mutex);
99a7f4da
CW
767
768 if (ref->cache)
769 kmem_cache_free(global.slab_cache, ref->cache);
64d6c500
CW
770}
771
d8af05ff
CW
772static inline bool is_idle_barrier(struct active_node *node, u64 idx)
773{
b1e3177b 774 return node->timeline == idx && !i915_active_fence_isset(&node->base);
d8af05ff
CW
775}
776
777static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
778{
779 struct rb_node *prev, *p;
780
781 if (RB_EMPTY_ROOT(&ref->tree))
782 return NULL;
783
c9ad602f 784 spin_lock_irq(&ref->tree_lock);
d8af05ff
CW
785 GEM_BUG_ON(i915_active_is_idle(ref));
786
787 /*
788 * Try to reuse any existing barrier nodes already allocated for this
789 * i915_active, due to overlapping active phases there is likely a
790 * node kept alive (as we reuse before parking). We prefer to reuse
791 * completely idle barriers (less hassle in manipulating the llists),
792 * but otherwise any will do.
793 */
794 if (ref->cache && is_idle_barrier(ref->cache, idx)) {
795 p = &ref->cache->node;
796 goto match;
797 }
798
799 prev = NULL;
800 p = ref->tree.rb_node;
801 while (p) {
802 struct active_node *node =
803 rb_entry(p, struct active_node, node);
804
805 if (is_idle_barrier(node, idx))
806 goto match;
807
808 prev = p;
809 if (node->timeline < idx)
810 p = p->rb_right;
811 else
812 p = p->rb_left;
813 }
814
815 /*
816 * No quick match, but we did find the leftmost rb_node for the
817 * kernel_context. Walk the rb_tree in-order to see if there were
818 * any idle-barriers on this timeline that we missed, or just use
819 * the first pending barrier.
820 */
821 for (p = prev; p; p = rb_next(p)) {
822 struct active_node *node =
823 rb_entry(p, struct active_node, node);
f130b712 824 struct intel_engine_cs *engine;
d8af05ff
CW
825
826 if (node->timeline > idx)
827 break;
828
829 if (node->timeline < idx)
830 continue;
831
832 if (is_idle_barrier(node, idx))
833 goto match;
834
835 /*
836 * The list of pending barriers is protected by the
837 * kernel_context timeline, which notably we do not hold
838 * here. i915_request_add_active_barriers() may consume
839 * the barrier before we claim it, so we have to check
840 * for success.
841 */
f130b712
CW
842 engine = __barrier_to_engine(node);
843 smp_rmb(); /* serialise with add_active_barriers */
844 if (is_barrier(&node->base) &&
845 ____active_del_barrier(ref, node, engine))
d8af05ff
CW
846 goto match;
847 }
848
c9ad602f 849 spin_unlock_irq(&ref->tree_lock);
d8af05ff
CW
850
851 return NULL;
852
853match:
854 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
855 if (p == &ref->cache->node)
5d934137 856 WRITE_ONCE(ref->cache, NULL);
c9ad602f 857 spin_unlock_irq(&ref->tree_lock);
d8af05ff
CW
858
859 return rb_entry(p, struct active_node, node);
860}
861
ce476c80
CW
862int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
863 struct intel_engine_cs *engine)
864{
3f99a614 865 intel_engine_mask_t tmp, mask = engine->mask;
d4c3c0b8 866 struct llist_node *first = NULL, *last = NULL;
a50134b1 867 struct intel_gt *gt = engine->gt;
ce476c80 868
b5e8e954 869 GEM_BUG_ON(i915_active_is_idle(ref));
84135022
CW
870
871 /* Wait until the previous preallocation is completed */
872 while (!llist_empty(&ref->preallocated_barriers))
873 cond_resched();
d8af05ff
CW
874
875 /*
876 * Preallocate a node for each physical engine supporting the target
877 * engine (remember virtual engines have more than one sibling).
878 * We can then use the preallocated nodes in
879 * i915_active_acquire_barrier()
880 */
416d3838 881 GEM_BUG_ON(!mask);
a50134b1 882 for_each_engine_masked(engine, gt, mask, tmp) {
75d0a7f3 883 u64 idx = engine->kernel_context->timeline->fence_context;
d4c3c0b8 884 struct llist_node *prev = first;
ce476c80
CW
885 struct active_node *node;
886
d8af05ff
CW
887 node = reuse_idle_barrier(ref, idx);
888 if (!node) {
889 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
e714977e 890 if (!node)
d8af05ff 891 goto unwind;
d8af05ff 892
b1e3177b
CW
893 RCU_INIT_POINTER(node->base.fence, NULL);
894 node->base.cb.func = node_retire;
d8af05ff
CW
895 node->timeline = idx;
896 node->ref = ref;
ce476c80
CW
897 }
898
b1e3177b 899 if (!i915_active_fence_isset(&node->base)) {
d8af05ff
CW
900 /*
901 * Mark this as being *our* unconnected proto-node.
902 *
903 * Since this node is not in any list, and we have
904 * decoupled it from the rbtree, we can reuse the
905 * request to indicate this is an idle-barrier node
906 * and then we can use the rb_node and list pointers
907 * for our tracking of the pending barrier.
908 */
b1e3177b
CW
909 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
910 node->base.cb.node.prev = (void *)engine;
5d934137 911 __i915_active_acquire(ref);
d8af05ff 912 }
df9f85d8 913 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
ce476c80 914
d8af05ff 915 GEM_BUG_ON(barrier_to_engine(node) != engine);
d4c3c0b8
JRS
916 first = barrier_to_ll(node);
917 first->next = prev;
918 if (!last)
919 last = first;
7009db14 920 intel_engine_pm_get(engine);
ce476c80
CW
921 }
922
84135022 923 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
d4c3c0b8 924 llist_add_batch(first, last, &ref->preallocated_barriers);
84135022 925
7009db14
CW
926 return 0;
927
928unwind:
d4c3c0b8
JRS
929 while (first) {
930 struct active_node *node = barrier_from_ll(first);
7009db14 931
d4c3c0b8 932 first = first->next;
84135022 933
d8af05ff
CW
934 atomic_dec(&ref->count);
935 intel_engine_pm_put(barrier_to_engine(node));
7009db14 936
7009db14
CW
937 kmem_cache_free(global.slab_cache, node);
938 }
e714977e 939 return -ENOMEM;
ce476c80
CW
940}
941
942void i915_active_acquire_barrier(struct i915_active *ref)
943{
944 struct llist_node *pos, *next;
c9ad602f 945 unsigned long flags;
ce476c80 946
12c255b5 947 GEM_BUG_ON(i915_active_is_idle(ref));
ce476c80 948
d8af05ff
CW
949 /*
950 * Transfer the list of preallocated barriers into the
951 * i915_active rbtree, but only as proto-nodes. They will be
952 * populated by i915_request_add_active_barriers() to point to the
953 * request that will eventually release them.
954 */
d8af05ff
CW
955 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
956 struct active_node *node = barrier_from_ll(pos);
957 struct intel_engine_cs *engine = barrier_to_engine(node);
ce476c80
CW
958 struct rb_node **p, *parent;
959
07779a76
CW
960 spin_lock_irqsave_nested(&ref->tree_lock, flags,
961 SINGLE_DEPTH_NESTING);
ce476c80
CW
962 parent = NULL;
963 p = &ref->tree.rb_node;
964 while (*p) {
d8af05ff
CW
965 struct active_node *it;
966
ce476c80 967 parent = *p;
d8af05ff
CW
968
969 it = rb_entry(parent, struct active_node, node);
970 if (it->timeline < node->timeline)
ce476c80
CW
971 p = &parent->rb_right;
972 else
973 p = &parent->rb_left;
974 }
975 rb_link_node(&node->node, parent, p);
976 rb_insert_color(&node->node, &ref->tree);
07779a76 977 spin_unlock_irqrestore(&ref->tree_lock, flags);
ce476c80 978
b7234840 979 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
d8af05ff 980 llist_add(barrier_to_ll(node), &engine->barrier_tasks);
edee52c9 981 intel_engine_pm_put_delay(engine, 1);
ce476c80 982 }
ce476c80
CW
983}
984
df9f85d8
CW
985static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
986{
987 return __active_fence_slot(&barrier_from_ll(node)->base);
988}
989
d8af05ff 990void i915_request_add_active_barriers(struct i915_request *rq)
ce476c80
CW
991{
992 struct intel_engine_cs *engine = rq->engine;
993 struct llist_node *node, *next;
b1e3177b 994 unsigned long flags;
ce476c80 995
e6ba7648 996 GEM_BUG_ON(!intel_context_is_barrier(rq->context));
d8af05ff 997 GEM_BUG_ON(intel_engine_is_virtual(engine));
d19d71fc 998 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
d8af05ff 999
b1e3177b
CW
1000 node = llist_del_all(&engine->barrier_tasks);
1001 if (!node)
1002 return;
d8af05ff
CW
1003 /*
1004 * Attach the list of proto-fences to the in-flight request such
1005 * that the parent i915_active will be released when this request
1006 * is retired.
1007 */
b1e3177b
CW
1008 spin_lock_irqsave(&rq->lock, flags);
1009 llist_for_each_safe(node, next, node) {
df9f85d8
CW
1010 /* serialise with reuse_idle_barrier */
1011 smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
b1e3177b
CW
1012 list_add_tail((struct list_head *)node, &rq->fence.cb_list);
1013 }
1014 spin_unlock_irqrestore(&rq->lock, flags);
1015}
1016
b1e3177b
CW
1017/*
1018 * __i915_active_fence_set: Update the last active fence along its timeline
1019 * @active: the active tracker
1020 * @fence: the new fence (under construction)
1021 *
1022 * Records the new @fence as the last active fence along its timeline in
1023 * this active tracker, moving the tracking callbacks from the previous
1024 * fence onto this one. Returns the previous fence (if not already completed),
1025 * which the caller must ensure is executed before the new fence. To ensure
1026 * that the order of fences within the timeline of the i915_active_fence is
df9f85d8 1027 * understood, it should be locked by the caller.
b1e3177b
CW
1028 */
1029struct dma_fence *
1030__i915_active_fence_set(struct i915_active_fence *active,
1031 struct dma_fence *fence)
1032{
1033 struct dma_fence *prev;
1034 unsigned long flags;
1035
df9f85d8
CW
1036 if (fence == rcu_access_pointer(active->fence))
1037 return fence;
1038
b1e3177b
CW
1039 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
1040
df9f85d8
CW
1041 /*
1042 * Consider that we have two threads arriving (A and B), with
1043 * C already resident as the active->fence.
1044 *
1045 * A does the xchg first, and so it sees C or NULL depending
1046 * on the timing of the interrupt handler. If it is NULL, the
1047 * previous fence must have been signaled and we know that
1048 * we are first on the timeline. If it is still present,
1049 * we acquire the lock on that fence and serialise with the interrupt
1050 * handler, in the process removing it from any future interrupt
1051 * callback. A will then wait on C before executing (if present).
1052 *
1053 * As B is second, it sees A as the previous fence and so waits for
1054 * it to complete its transition and takes over the occupancy for
1055 * itself -- remembering that it needs to wait on A before executing.
1056 *
1057 * Note the strong ordering of the timeline also provides consistent
1058 * nesting rules for the fence->lock; the inner lock is always the
1059 * older lock.
1060 */
1061 spin_lock_irqsave(fence->lock, flags);
1062 prev = xchg(__active_fence_slot(active), fence);
b1e3177b
CW
1063 if (prev) {
1064 GEM_BUG_ON(prev == fence);
1065 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
1066 __list_del_entry(&active->cb.node);
1067 spin_unlock(prev->lock); /* serialise with prev->cb_list */
d8af05ff 1068 }
b1e3177b 1069 list_add_tail(&active->cb.node, &fence->cb_list);
b1e3177b
CW
1070 spin_unlock_irqrestore(fence->lock, flags);
1071
1072 return prev;
ce476c80
CW
1073}
1074
b1e3177b
CW
1075int i915_active_fence_set(struct i915_active_fence *active,
1076 struct i915_request *rq)
21950ee7 1077{
b1e3177b
CW
1078 struct dma_fence *fence;
1079 int err = 0;
21950ee7 1080
b1e3177b
CW
1081 /* Must maintain timeline ordering wrt previous active requests */
1082 rcu_read_lock();
1083 fence = __i915_active_fence_set(active, &rq->fence);
1084 if (fence) /* but the previous fence may not belong to that timeline! */
1085 fence = dma_fence_get_rcu(fence);
1086 rcu_read_unlock();
1087 if (fence) {
1088 err = i915_request_await_dma_fence(rq, fence);
1089 dma_fence_put(fence);
1090 }
21950ee7 1091
b1e3177b 1092 return err;
21950ee7
CW
1093}
1094
b1e3177b 1095void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
21950ee7 1096{
df9f85d8 1097 active_fence_cb(fence, cb);
21950ee7
CW
1098}
1099
229007e0
CW
1100struct auto_active {
1101 struct i915_active base;
1102 struct kref ref;
1103};
1104
1105struct i915_active *i915_active_get(struct i915_active *ref)
1106{
1107 struct auto_active *aa = container_of(ref, typeof(*aa), base);
1108
1109 kref_get(&aa->ref);
1110 return &aa->base;
1111}
1112
1113static void auto_release(struct kref *ref)
1114{
1115 struct auto_active *aa = container_of(ref, typeof(*aa), ref);
1116
1117 i915_active_fini(&aa->base);
1118 kfree(aa);
1119}
1120
1121void i915_active_put(struct i915_active *ref)
1122{
1123 struct auto_active *aa = container_of(ref, typeof(*aa), base);
1124
1125 kref_put(&aa->ref, auto_release);
1126}
1127
1128static int auto_active(struct i915_active *ref)
1129{
1130 i915_active_get(ref);
1131 return 0;
1132}
1133
1134static void auto_retire(struct i915_active *ref)
1135{
1136 i915_active_put(ref);
1137}
1138
1139struct i915_active *i915_active_create(void)
1140{
1141 struct auto_active *aa;
1142
1143 aa = kmalloc(sizeof(*aa), GFP_KERNEL);
1144 if (!aa)
1145 return NULL;
1146
1147 kref_init(&aa->ref);
1148 i915_active_init(&aa->base, auto_active, auto_retire);
1149
1150 return &aa->base;
1151}
1152
64d6c500
CW
1153#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1154#include "selftests/i915_active.c"
1155#endif
5f5c139d 1156
103b76ee 1157static void i915_global_active_shrink(void)
5f5c139d 1158{
103b76ee 1159 kmem_cache_shrink(global.slab_cache);
5f5c139d
CW
1160}
1161
103b76ee 1162static void i915_global_active_exit(void)
32eb6bcf 1163{
103b76ee 1164 kmem_cache_destroy(global.slab_cache);
32eb6bcf
CW
1165}
1166
103b76ee
CW
1167static struct i915_global_active global = { {
1168 .shrink = i915_global_active_shrink,
1169 .exit = i915_global_active_exit,
1170} };
1171
1172int __init i915_global_active_init(void)
5f5c139d 1173{
103b76ee
CW
1174 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
1175 if (!global.slab_cache)
1176 return -ENOMEM;
1177
1178 i915_global_register(&global.base);
1179 return 0;
5f5c139d 1180}