debugobjects: Allow debug_obj_descr to be const
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_active.c
CommitLineData
64d6c500
CW
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2019 Intel Corporation
5 */
6
5361db1a
CW
7#include <linux/debugobjects.h>
8
e6ba7648 9#include "gt/intel_context.h"
d13a3177 10#include "gt/intel_engine_heartbeat.h"
7009db14 11#include "gt/intel_engine_pm.h"
2871ea85 12#include "gt/intel_ring.h"
7009db14 13
64d6c500
CW
14#include "i915_drv.h"
15#include "i915_active.h"
103b76ee 16#include "i915_globals.h"
64d6c500 17
5f5c139d
CW
18/*
19 * Active refs memory management
20 *
21 * To be more economical with memory, we reap all the i915_active trees as
22 * they idle (when we know the active requests are inactive) and allocate the
23 * nodes from a local slab cache to hopefully reduce the fragmentation.
24 */
25static struct i915_global_active {
103b76ee 26 struct i915_global base;
5f5c139d
CW
27 struct kmem_cache *slab_cache;
28} global;
29
64d6c500 30struct active_node {
b1e3177b 31 struct i915_active_fence base;
64d6c500
CW
32 struct i915_active *ref;
33 struct rb_node node;
34 u64 timeline;
35};
36
d8af05ff 37static inline struct active_node *
b1e3177b 38node_from_active(struct i915_active_fence *active)
d8af05ff
CW
39{
40 return container_of(active, struct active_node, base);
41}
42
43#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
44
b1e3177b 45static inline bool is_barrier(const struct i915_active_fence *active)
d8af05ff 46{
b1e3177b 47 return IS_ERR(rcu_access_pointer(active->fence));
d8af05ff
CW
48}
49
50static inline struct llist_node *barrier_to_ll(struct active_node *node)
51{
52 GEM_BUG_ON(!is_barrier(&node->base));
b1e3177b 53 return (struct llist_node *)&node->base.cb.node;
d8af05ff
CW
54}
55
f130b712
CW
56static inline struct intel_engine_cs *
57__barrier_to_engine(struct active_node *node)
58{
b1e3177b 59 return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
f130b712
CW
60}
61
d8af05ff
CW
62static inline struct intel_engine_cs *
63barrier_to_engine(struct active_node *node)
64{
65 GEM_BUG_ON(!is_barrier(&node->base));
f130b712 66 return __barrier_to_engine(node);
d8af05ff
CW
67}
68
69static inline struct active_node *barrier_from_ll(struct llist_node *x)
70{
71 return container_of((struct list_head *)x,
b1e3177b 72 struct active_node, base.cb.node);
d8af05ff
CW
73}
74
5361db1a
CW
75#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
76
77static void *active_debug_hint(void *addr)
78{
79 struct i915_active *ref = addr;
80
12c255b5 81 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
5361db1a
CW
82}
83
84static struct debug_obj_descr active_debug_desc = {
85 .name = "i915_active",
86 .debug_hint = active_debug_hint,
87};
88
89static void debug_active_init(struct i915_active *ref)
90{
91 debug_object_init(ref, &active_debug_desc);
92}
93
94static void debug_active_activate(struct i915_active *ref)
95{
bbca083d 96 lockdep_assert_held(&ref->tree_lock);
f52c6d0d
CW
97 if (!atomic_read(&ref->count)) /* before the first inc */
98 debug_object_activate(ref, &active_debug_desc);
5361db1a
CW
99}
100
101static void debug_active_deactivate(struct i915_active *ref)
102{
c9ad602f 103 lockdep_assert_held(&ref->tree_lock);
f52c6d0d
CW
104 if (!atomic_read(&ref->count)) /* after the last dec */
105 debug_object_deactivate(ref, &active_debug_desc);
5361db1a
CW
106}
107
108static void debug_active_fini(struct i915_active *ref)
109{
110 debug_object_free(ref, &active_debug_desc);
111}
112
113static void debug_active_assert(struct i915_active *ref)
114{
115 debug_object_assert_init(ref, &active_debug_desc);
116}
117
118#else
119
120static inline void debug_active_init(struct i915_active *ref) { }
121static inline void debug_active_activate(struct i915_active *ref) { }
122static inline void debug_active_deactivate(struct i915_active *ref) { }
123static inline void debug_active_fini(struct i915_active *ref) { }
124static inline void debug_active_assert(struct i915_active *ref) { }
125
126#endif
127
a42375af 128static void
12c255b5 129__active_retire(struct i915_active *ref)
a42375af
CW
130{
131 struct active_node *it, *n;
12c255b5 132 struct rb_root root;
c9ad602f 133 unsigned long flags;
12c255b5 134
274cbf20 135 GEM_BUG_ON(i915_active_is_idle(ref));
12c255b5
CW
136
137 /* return the unused nodes to our slabcache -- flushing the allocator */
c9ad602f 138 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
12c255b5
CW
139 return;
140
b1e3177b 141 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
c9ad602f
CW
142 debug_active_deactivate(ref);
143
144 root = ref->tree;
145 ref->tree = RB_ROOT;
146 ref->cache = NULL;
147
148 spin_unlock_irqrestore(&ref->tree_lock, flags);
e1d7b66b
CW
149
150 /* After the final retire, the entire struct may be freed */
151 if (ref->retire)
152 ref->retire(ref);
b1e3177b
CW
153
154 /* ... except if you wait on it, you must manage your own references! */
155 wake_up_var(ref);
c9ad602f
CW
156
157 rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
158 GEM_BUG_ON(i915_active_fence_isset(&it->base));
159 kmem_cache_free(global.slab_cache, it);
160 }
a42375af
CW
161}
162
274cbf20
CW
163static void
164active_work(struct work_struct *wrk)
165{
166 struct i915_active *ref = container_of(wrk, typeof(*ref), work);
167
168 GEM_BUG_ON(!atomic_read(&ref->count));
169 if (atomic_add_unless(&ref->count, -1, 1))
170 return;
171
274cbf20
CW
172 __active_retire(ref);
173}
174
64d6c500 175static void
12c255b5 176active_retire(struct i915_active *ref)
64d6c500 177{
12c255b5
CW
178 GEM_BUG_ON(!atomic_read(&ref->count));
179 if (atomic_add_unless(&ref->count, -1, 1))
a42375af
CW
180 return;
181
c9ad602f 182 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
274cbf20
CW
183 queue_work(system_unbound_wq, &ref->work);
184 return;
185 }
186
12c255b5 187 __active_retire(ref);
64d6c500
CW
188}
189
df9f85d8
CW
190static inline struct dma_fence **
191__active_fence_slot(struct i915_active_fence *active)
192{
193 return (struct dma_fence ** __force)&active->fence;
194}
195
196static inline bool
197active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
198{
199 struct i915_active_fence *active =
200 container_of(cb, typeof(*active), cb);
201
202 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
203}
204
64d6c500 205static void
b1e3177b 206node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
64d6c500 207{
df9f85d8
CW
208 if (active_fence_cb(fence, cb))
209 active_retire(container_of(cb, struct active_node, base.cb)->ref);
64d6c500
CW
210}
211
b1e3177b
CW
212static void
213excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
214{
df9f85d8
CW
215 if (active_fence_cb(fence, cb))
216 active_retire(container_of(cb, struct i915_active, excl.cb));
b1e3177b
CW
217}
218
219static struct i915_active_fence *
25ffd4b1 220active_instance(struct i915_active *ref, struct intel_timeline *tl)
64d6c500 221{
12c255b5 222 struct active_node *node, *prealloc;
64d6c500 223 struct rb_node **p, *parent;
25ffd4b1 224 u64 idx = tl->fence_context;
64d6c500
CW
225
226 /*
227 * We track the most recently used timeline to skip a rbtree search
228 * for the common case, under typical loads we never need the rbtree
229 * at all. We can reuse the last slot if it is empty, that is
230 * after the previous activity has been retired, or if it matches the
231 * current timeline.
64d6c500 232 */
12c255b5
CW
233 node = READ_ONCE(ref->cache);
234 if (node && node->timeline == idx)
235 return &node->base;
236
237 /* Preallocate a replacement, just in case */
238 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
239 if (!prealloc)
240 return NULL;
64d6c500 241
c9ad602f 242 spin_lock_irq(&ref->tree_lock);
12c255b5 243 GEM_BUG_ON(i915_active_is_idle(ref));
64d6c500
CW
244
245 parent = NULL;
246 p = &ref->tree.rb_node;
247 while (*p) {
248 parent = *p;
249
250 node = rb_entry(parent, struct active_node, node);
12c255b5
CW
251 if (node->timeline == idx) {
252 kmem_cache_free(global.slab_cache, prealloc);
253 goto out;
254 }
64d6c500
CW
255
256 if (node->timeline < idx)
257 p = &parent->rb_right;
258 else
259 p = &parent->rb_left;
260 }
261
12c255b5 262 node = prealloc;
df9f85d8 263 __i915_active_fence_init(&node->base, NULL, node_retire);
64d6c500
CW
264 node->ref = ref;
265 node->timeline = idx;
266
267 rb_link_node(&node->node, parent, p);
268 rb_insert_color(&node->node, &ref->tree);
269
64d6c500 270out:
12c255b5 271 ref->cache = node;
c9ad602f 272 spin_unlock_irq(&ref->tree_lock);
12c255b5 273
d8af05ff 274 BUILD_BUG_ON(offsetof(typeof(*node), base));
12c255b5 275 return &node->base;
64d6c500
CW
276}
277
b1e3177b 278void __i915_active_init(struct i915_active *ref,
12c255b5
CW
279 int (*active)(struct i915_active *ref),
280 void (*retire)(struct i915_active *ref),
ae303004
CW
281 struct lock_class_key *mkey,
282 struct lock_class_key *wkey)
64d6c500 283{
274cbf20
CW
284 unsigned long bits;
285
5361db1a
CW
286 debug_active_init(ref);
287
79c7a28e 288 ref->flags = 0;
12c255b5 289 ref->active = active;
274cbf20
CW
290 ref->retire = ptr_unpack_bits(retire, &bits, 2);
291 if (bits & I915_ACTIVE_MAY_SLEEP)
292 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
2850748e 293
c9ad602f 294 spin_lock_init(&ref->tree_lock);
64d6c500 295 ref->tree = RB_ROOT;
12c255b5 296 ref->cache = NULL;
c9ad602f 297
d8af05ff 298 init_llist_head(&ref->preallocated_barriers);
12c255b5 299 atomic_set(&ref->count, 0);
ae303004 300 __mutex_init(&ref->mutex, "i915_active", mkey);
df9f85d8 301 __i915_active_fence_init(&ref->excl, NULL, excl_retire);
274cbf20 302 INIT_WORK(&ref->work, active_work);
ae303004
CW
303#if IS_ENABLED(CONFIG_LOCKDEP)
304 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
305#endif
64d6c500
CW
306}
307
f130b712
CW
308static bool ____active_del_barrier(struct i915_active *ref,
309 struct active_node *node,
310 struct intel_engine_cs *engine)
311
d8af05ff 312{
d8af05ff
CW
313 struct llist_node *head = NULL, *tail = NULL;
314 struct llist_node *pos, *next;
315
75d0a7f3 316 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
d8af05ff
CW
317
318 /*
319 * Rebuild the llist excluding our node. We may perform this
320 * outside of the kernel_context timeline mutex and so someone
321 * else may be manipulating the engine->barrier_tasks, in
322 * which case either we or they will be upset :)
323 *
324 * A second __active_del_barrier() will report failure to claim
325 * the active_node and the caller will just shrug and know not to
326 * claim ownership of its node.
327 *
328 * A concurrent i915_request_add_active_barriers() will miss adding
329 * any of the tasks, but we will try again on the next -- and since
330 * we are actively using the barrier, we know that there will be
331 * at least another opportunity when we idle.
332 */
333 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
334 if (node == barrier_from_ll(pos)) {
335 node = NULL;
336 continue;
337 }
338
339 pos->next = head;
340 head = pos;
341 if (!tail)
342 tail = pos;
343 }
344 if (head)
345 llist_add_batch(head, tail, &engine->barrier_tasks);
346
347 return !node;
348}
349
f130b712
CW
350static bool
351__active_del_barrier(struct i915_active *ref, struct active_node *node)
352{
353 return ____active_del_barrier(ref, node, barrier_to_engine(node));
354}
355
64d6c500 356int i915_active_ref(struct i915_active *ref,
25ffd4b1 357 struct intel_timeline *tl,
b1e3177b 358 struct dma_fence *fence)
64d6c500 359{
b1e3177b 360 struct i915_active_fence *active;
12c255b5 361 int err;
312c4ba1 362
25ffd4b1
CW
363 lockdep_assert_held(&tl->mutex);
364
312c4ba1 365 /* Prevent reaping in case we malloc/wait while building the tree */
12c255b5
CW
366 err = i915_active_acquire(ref);
367 if (err)
368 return err;
64d6c500 369
25ffd4b1 370 active = active_instance(ref, tl);
12c255b5
CW
371 if (!active) {
372 err = -ENOMEM;
312c4ba1
CW
373 goto out;
374 }
64d6c500 375
d8af05ff
CW
376 if (is_barrier(active)) { /* proto-node used by our idle barrier */
377 /*
378 * This request is on the kernel_context timeline, and so
379 * we can use it to substitute for the pending idle-barrer
380 * request that we want to emit on the kernel_context.
381 */
382 __active_del_barrier(ref, node_from_active(active));
b1e3177b
CW
383 RCU_INIT_POINTER(active->fence, NULL);
384 atomic_dec(&ref->count);
d8af05ff 385 }
b1e3177b
CW
386 if (!__i915_active_fence_set(active, fence))
387 atomic_inc(&ref->count);
64d6c500 388
312c4ba1
CW
389out:
390 i915_active_release(ref);
391 return err;
64d6c500
CW
392}
393
e3793468
CW
394struct dma_fence *
395i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
2850748e 396{
e3793468
CW
397 struct dma_fence *prev;
398
2850748e
CW
399 /* We expect the caller to manage the exclusive timeline ordering */
400 GEM_BUG_ON(i915_active_is_idle(ref));
401
30ca04e1 402 rcu_read_lock();
e3793468 403 prev = __i915_active_fence_set(&ref->excl, f);
30ca04e1
CW
404 if (prev)
405 prev = dma_fence_get_rcu(prev);
406 else
b1e3177b 407 atomic_inc(&ref->count);
30ca04e1 408 rcu_read_unlock();
e3793468
CW
409
410 return prev;
b1e3177b 411}
2850748e 412
b1e3177b
CW
413bool i915_active_acquire_if_busy(struct i915_active *ref)
414{
415 debug_active_assert(ref);
416 return atomic_add_unless(&ref->count, 1, 0);
2850748e
CW
417}
418
12c255b5 419int i915_active_acquire(struct i915_active *ref)
64d6c500 420{
12c255b5
CW
421 int err;
422
b1e3177b 423 if (i915_active_acquire_if_busy(ref))
12c255b5 424 return 0;
5361db1a 425
12c255b5
CW
426 err = mutex_lock_interruptible(&ref->mutex);
427 if (err)
428 return err;
5361db1a 429
ac0e331a
CW
430 if (likely(!i915_active_acquire_if_busy(ref))) {
431 if (ref->active)
432 err = ref->active(ref);
433 if (!err) {
434 spin_lock_irq(&ref->tree_lock); /* __active_retire() */
435 debug_active_activate(ref);
436 atomic_inc(&ref->count);
437 spin_unlock_irq(&ref->tree_lock);
438 }
12c255b5
CW
439 }
440
441 mutex_unlock(&ref->mutex);
442
443 return err;
64d6c500
CW
444}
445
446void i915_active_release(struct i915_active *ref)
447{
5361db1a 448 debug_active_assert(ref);
12c255b5 449 active_retire(ref);
64d6c500
CW
450}
451
b1e3177b 452static void enable_signaling(struct i915_active_fence *active)
79c7a28e 453{
b1e3177b 454 struct dma_fence *fence;
79c7a28e 455
c0e31018
CW
456 if (unlikely(is_barrier(active)))
457 return;
458
b1e3177b
CW
459 fence = i915_active_fence_get(active);
460 if (!fence)
461 return;
2850748e 462
b1e3177b
CW
463 dma_fence_enable_sw_signaling(fence);
464 dma_fence_put(fence);
2850748e
CW
465}
466
d13a3177 467static int flush_barrier(struct active_node *it)
64d6c500 468{
d13a3177 469 struct intel_engine_cs *engine;
64d6c500 470
d13a3177
CW
471 if (likely(!is_barrier(&it->base)))
472 return 0;
12c255b5 473
d13a3177
CW
474 engine = __barrier_to_engine(it);
475 smp_rmb(); /* serialise with add_active_barriers */
476 if (!is_barrier(&it->base))
12c255b5 477 return 0;
2850748e 478
d13a3177
CW
479 return intel_engine_flush_barriers(engine);
480}
481
482static int flush_lazy_signals(struct i915_active *ref)
483{
484 struct active_node *it, *n;
485 int err = 0;
486
b1e3177b 487 enable_signaling(&ref->excl);
64d6c500 488 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
d13a3177
CW
489 err = flush_barrier(it); /* unconnected idle barrier? */
490 if (err)
491 break;
d8af05ff 492
b1e3177b 493 enable_signaling(&it->base);
64d6c500
CW
494 }
495
d13a3177
CW
496 return err;
497}
498
d75a92a8 499int __i915_active_wait(struct i915_active *ref, int state)
d13a3177
CW
500{
501 int err;
502
503 might_sleep();
504
505 if (!i915_active_acquire_if_busy(ref))
506 return 0;
507
508 /* Any fence added after the wait begins will not be auto-signaled */
509 err = flush_lazy_signals(ref);
b1e3177b 510 i915_active_release(ref);
afd1bcd4
CW
511 if (err)
512 return err;
513
d75a92a8
CW
514 if (!i915_active_is_idle(ref) &&
515 ___wait_var_event(ref, i915_active_is_idle(ref),
516 state, 0, 0, schedule()))
79c7a28e
CW
517 return -EINTR;
518
e1cda6a5 519 flush_work(&ref->work);
afd1bcd4 520 return 0;
64d6c500
CW
521}
522
29e6ecf3
CW
523static int __await_active(struct i915_active_fence *active,
524 int (*fn)(void *arg, struct dma_fence *fence),
525 void *arg)
526{
527 struct dma_fence *fence;
528
529 if (is_barrier(active)) /* XXX flush the barrier? */
530 return 0;
531
532 fence = i915_active_fence_get(active);
533 if (fence) {
534 int err;
535
536 err = fn(arg, fence);
537 dma_fence_put(fence);
538 if (err < 0)
539 return err;
540 }
541
542 return 0;
543}
544
3b0a0579
CW
545struct wait_barrier {
546 struct wait_queue_entry base;
547 struct i915_active *ref;
548};
549
550static int
551barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
552{
553 struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
554
555 if (i915_active_is_idle(wb->ref)) {
556 list_del(&wq->entry);
557 i915_sw_fence_complete(wq->private);
558 kfree(wq);
559 }
560
561 return 0;
562}
563
564static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
565{
566 struct wait_barrier *wb;
567
568 wb = kmalloc(sizeof(*wb), GFP_KERNEL);
569 if (unlikely(!wb))
570 return -ENOMEM;
571
572 GEM_BUG_ON(i915_active_is_idle(ref));
573 if (!i915_sw_fence_await(fence)) {
574 kfree(wb);
575 return -EINVAL;
576 }
577
578 wb->base.flags = 0;
579 wb->base.func = barrier_wake;
580 wb->base.private = fence;
581 wb->ref = ref;
582
583 add_wait_queue(__var_waitqueue(ref), &wb->base);
584 return 0;
585}
586
29e6ecf3
CW
587static int await_active(struct i915_active *ref,
588 unsigned int flags,
589 int (*fn)(void *arg, struct dma_fence *fence),
3b0a0579 590 void *arg, struct i915_sw_fence *barrier)
64d6c500 591{
2850748e 592 int err = 0;
64d6c500 593
3b0a0579
CW
594 if (!i915_active_acquire_if_busy(ref))
595 return 0;
596
442dbc5c
CW
597 if (flags & I915_ACTIVE_AWAIT_EXCL &&
598 rcu_access_pointer(ref->excl.fence)) {
29e6ecf3
CW
599 err = __await_active(&ref->excl, fn, arg);
600 if (err)
3b0a0579 601 goto out;
29e6ecf3
CW
602 }
603
3b0a0579 604 if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
29e6ecf3
CW
605 struct active_node *it, *n;
606
607 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
608 err = __await_active(&it->base, fn, arg);
609 if (err)
3b0a0579 610 goto out;
2850748e 611 }
3b0a0579
CW
612 }
613
614 if (flags & I915_ACTIVE_AWAIT_BARRIER) {
615 err = flush_lazy_signals(ref);
29e6ecf3 616 if (err)
3b0a0579
CW
617 goto out;
618
619 err = __await_barrier(ref, barrier);
620 if (err)
621 goto out;
64d6c500
CW
622 }
623
3b0a0579
CW
624out:
625 i915_active_release(ref);
626 return err;
29e6ecf3
CW
627}
628
629static int rq_await_fence(void *arg, struct dma_fence *fence)
630{
631 return i915_request_await_dma_fence(arg, fence);
632}
2850748e 633
29e6ecf3
CW
634int i915_request_await_active(struct i915_request *rq,
635 struct i915_active *ref,
636 unsigned int flags)
637{
3b0a0579 638 return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
29e6ecf3
CW
639}
640
641static int sw_await_fence(void *arg, struct dma_fence *fence)
642{
643 return i915_sw_fence_await_dma_fence(arg, fence, 0,
644 GFP_NOWAIT | __GFP_NOWARN);
645}
646
647int i915_sw_fence_await_active(struct i915_sw_fence *fence,
648 struct i915_active *ref,
649 unsigned int flags)
650{
3b0a0579 651 return await_active(ref, flags, sw_await_fence, fence, fence);
64d6c500
CW
652}
653
a42375af 654#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
64d6c500
CW
655void i915_active_fini(struct i915_active *ref)
656{
5361db1a 657 debug_active_fini(ref);
12c255b5 658 GEM_BUG_ON(atomic_read(&ref->count));
274cbf20
CW
659 GEM_BUG_ON(work_pending(&ref->work));
660 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
12c255b5 661 mutex_destroy(&ref->mutex);
64d6c500 662}
a42375af 663#endif
64d6c500 664
d8af05ff
CW
665static inline bool is_idle_barrier(struct active_node *node, u64 idx)
666{
b1e3177b 667 return node->timeline == idx && !i915_active_fence_isset(&node->base);
d8af05ff
CW
668}
669
670static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
671{
672 struct rb_node *prev, *p;
673
674 if (RB_EMPTY_ROOT(&ref->tree))
675 return NULL;
676
c9ad602f 677 spin_lock_irq(&ref->tree_lock);
d8af05ff
CW
678 GEM_BUG_ON(i915_active_is_idle(ref));
679
680 /*
681 * Try to reuse any existing barrier nodes already allocated for this
682 * i915_active, due to overlapping active phases there is likely a
683 * node kept alive (as we reuse before parking). We prefer to reuse
684 * completely idle barriers (less hassle in manipulating the llists),
685 * but otherwise any will do.
686 */
687 if (ref->cache && is_idle_barrier(ref->cache, idx)) {
688 p = &ref->cache->node;
689 goto match;
690 }
691
692 prev = NULL;
693 p = ref->tree.rb_node;
694 while (p) {
695 struct active_node *node =
696 rb_entry(p, struct active_node, node);
697
698 if (is_idle_barrier(node, idx))
699 goto match;
700
701 prev = p;
702 if (node->timeline < idx)
703 p = p->rb_right;
704 else
705 p = p->rb_left;
706 }
707
708 /*
709 * No quick match, but we did find the leftmost rb_node for the
710 * kernel_context. Walk the rb_tree in-order to see if there were
711 * any idle-barriers on this timeline that we missed, or just use
712 * the first pending barrier.
713 */
714 for (p = prev; p; p = rb_next(p)) {
715 struct active_node *node =
716 rb_entry(p, struct active_node, node);
f130b712 717 struct intel_engine_cs *engine;
d8af05ff
CW
718
719 if (node->timeline > idx)
720 break;
721
722 if (node->timeline < idx)
723 continue;
724
725 if (is_idle_barrier(node, idx))
726 goto match;
727
728 /*
729 * The list of pending barriers is protected by the
730 * kernel_context timeline, which notably we do not hold
731 * here. i915_request_add_active_barriers() may consume
732 * the barrier before we claim it, so we have to check
733 * for success.
734 */
f130b712
CW
735 engine = __barrier_to_engine(node);
736 smp_rmb(); /* serialise with add_active_barriers */
737 if (is_barrier(&node->base) &&
738 ____active_del_barrier(ref, node, engine))
d8af05ff
CW
739 goto match;
740 }
741
c9ad602f 742 spin_unlock_irq(&ref->tree_lock);
d8af05ff
CW
743
744 return NULL;
745
746match:
747 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
748 if (p == &ref->cache->node)
749 ref->cache = NULL;
c9ad602f 750 spin_unlock_irq(&ref->tree_lock);
d8af05ff
CW
751
752 return rb_entry(p, struct active_node, node);
753}
754
ce476c80
CW
755int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
756 struct intel_engine_cs *engine)
757{
3f99a614 758 intel_engine_mask_t tmp, mask = engine->mask;
d4c3c0b8 759 struct llist_node *first = NULL, *last = NULL;
a50134b1 760 struct intel_gt *gt = engine->gt;
7009db14 761 int err;
ce476c80 762
b5e8e954 763 GEM_BUG_ON(i915_active_is_idle(ref));
84135022
CW
764
765 /* Wait until the previous preallocation is completed */
766 while (!llist_empty(&ref->preallocated_barriers))
767 cond_resched();
d8af05ff
CW
768
769 /*
770 * Preallocate a node for each physical engine supporting the target
771 * engine (remember virtual engines have more than one sibling).
772 * We can then use the preallocated nodes in
773 * i915_active_acquire_barrier()
774 */
416d3838 775 GEM_BUG_ON(!mask);
a50134b1 776 for_each_engine_masked(engine, gt, mask, tmp) {
75d0a7f3 777 u64 idx = engine->kernel_context->timeline->fence_context;
d4c3c0b8 778 struct llist_node *prev = first;
ce476c80
CW
779 struct active_node *node;
780
d8af05ff
CW
781 node = reuse_idle_barrier(ref, idx);
782 if (!node) {
783 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
784 if (!node) {
785 err = ENOMEM;
786 goto unwind;
787 }
788
b1e3177b
CW
789 RCU_INIT_POINTER(node->base.fence, NULL);
790 node->base.cb.func = node_retire;
d8af05ff
CW
791 node->timeline = idx;
792 node->ref = ref;
ce476c80
CW
793 }
794
b1e3177b 795 if (!i915_active_fence_isset(&node->base)) {
d8af05ff
CW
796 /*
797 * Mark this as being *our* unconnected proto-node.
798 *
799 * Since this node is not in any list, and we have
800 * decoupled it from the rbtree, we can reuse the
801 * request to indicate this is an idle-barrier node
802 * and then we can use the rb_node and list pointers
803 * for our tracking of the pending barrier.
804 */
b1e3177b
CW
805 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
806 node->base.cb.node.prev = (void *)engine;
d8af05ff
CW
807 atomic_inc(&ref->count);
808 }
df9f85d8 809 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
ce476c80 810
d8af05ff 811 GEM_BUG_ON(barrier_to_engine(node) != engine);
d4c3c0b8
JRS
812 first = barrier_to_ll(node);
813 first->next = prev;
814 if (!last)
815 last = first;
7009db14 816 intel_engine_pm_get(engine);
ce476c80
CW
817 }
818
84135022 819 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
d4c3c0b8 820 llist_add_batch(first, last, &ref->preallocated_barriers);
84135022 821
7009db14
CW
822 return 0;
823
824unwind:
d4c3c0b8
JRS
825 while (first) {
826 struct active_node *node = barrier_from_ll(first);
7009db14 827
d4c3c0b8 828 first = first->next;
84135022 829
d8af05ff
CW
830 atomic_dec(&ref->count);
831 intel_engine_pm_put(barrier_to_engine(node));
7009db14 832
7009db14
CW
833 kmem_cache_free(global.slab_cache, node);
834 }
ce476c80
CW
835 return err;
836}
837
838void i915_active_acquire_barrier(struct i915_active *ref)
839{
840 struct llist_node *pos, *next;
c9ad602f 841 unsigned long flags;
ce476c80 842
12c255b5 843 GEM_BUG_ON(i915_active_is_idle(ref));
ce476c80 844
d8af05ff
CW
845 /*
846 * Transfer the list of preallocated barriers into the
847 * i915_active rbtree, but only as proto-nodes. They will be
848 * populated by i915_request_add_active_barriers() to point to the
849 * request that will eventually release them.
850 */
d8af05ff
CW
851 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
852 struct active_node *node = barrier_from_ll(pos);
853 struct intel_engine_cs *engine = barrier_to_engine(node);
ce476c80
CW
854 struct rb_node **p, *parent;
855
07779a76
CW
856 spin_lock_irqsave_nested(&ref->tree_lock, flags,
857 SINGLE_DEPTH_NESTING);
ce476c80
CW
858 parent = NULL;
859 p = &ref->tree.rb_node;
860 while (*p) {
d8af05ff
CW
861 struct active_node *it;
862
ce476c80 863 parent = *p;
d8af05ff
CW
864
865 it = rb_entry(parent, struct active_node, node);
866 if (it->timeline < node->timeline)
ce476c80
CW
867 p = &parent->rb_right;
868 else
869 p = &parent->rb_left;
870 }
871 rb_link_node(&node->node, parent, p);
872 rb_insert_color(&node->node, &ref->tree);
07779a76 873 spin_unlock_irqrestore(&ref->tree_lock, flags);
ce476c80 874
b7234840 875 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
d8af05ff 876 llist_add(barrier_to_ll(node), &engine->barrier_tasks);
edee52c9 877 intel_engine_pm_put_delay(engine, 1);
ce476c80 878 }
ce476c80
CW
879}
880
df9f85d8
CW
881static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
882{
883 return __active_fence_slot(&barrier_from_ll(node)->base);
884}
885
d8af05ff 886void i915_request_add_active_barriers(struct i915_request *rq)
ce476c80
CW
887{
888 struct intel_engine_cs *engine = rq->engine;
889 struct llist_node *node, *next;
b1e3177b 890 unsigned long flags;
ce476c80 891
e6ba7648 892 GEM_BUG_ON(!intel_context_is_barrier(rq->context));
d8af05ff 893 GEM_BUG_ON(intel_engine_is_virtual(engine));
d19d71fc 894 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
d8af05ff 895
b1e3177b
CW
896 node = llist_del_all(&engine->barrier_tasks);
897 if (!node)
898 return;
d8af05ff
CW
899 /*
900 * Attach the list of proto-fences to the in-flight request such
901 * that the parent i915_active will be released when this request
902 * is retired.
903 */
b1e3177b
CW
904 spin_lock_irqsave(&rq->lock, flags);
905 llist_for_each_safe(node, next, node) {
df9f85d8
CW
906 /* serialise with reuse_idle_barrier */
907 smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
b1e3177b
CW
908 list_add_tail((struct list_head *)node, &rq->fence.cb_list);
909 }
910 spin_unlock_irqrestore(&rq->lock, flags);
911}
912
b1e3177b
CW
913/*
914 * __i915_active_fence_set: Update the last active fence along its timeline
915 * @active: the active tracker
916 * @fence: the new fence (under construction)
917 *
918 * Records the new @fence as the last active fence along its timeline in
919 * this active tracker, moving the tracking callbacks from the previous
920 * fence onto this one. Returns the previous fence (if not already completed),
921 * which the caller must ensure is executed before the new fence. To ensure
922 * that the order of fences within the timeline of the i915_active_fence is
df9f85d8 923 * understood, it should be locked by the caller.
b1e3177b
CW
924 */
925struct dma_fence *
926__i915_active_fence_set(struct i915_active_fence *active,
927 struct dma_fence *fence)
928{
929 struct dma_fence *prev;
930 unsigned long flags;
931
df9f85d8
CW
932 if (fence == rcu_access_pointer(active->fence))
933 return fence;
934
b1e3177b
CW
935 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
936
df9f85d8
CW
937 /*
938 * Consider that we have two threads arriving (A and B), with
939 * C already resident as the active->fence.
940 *
941 * A does the xchg first, and so it sees C or NULL depending
942 * on the timing of the interrupt handler. If it is NULL, the
943 * previous fence must have been signaled and we know that
944 * we are first on the timeline. If it is still present,
945 * we acquire the lock on that fence and serialise with the interrupt
946 * handler, in the process removing it from any future interrupt
947 * callback. A will then wait on C before executing (if present).
948 *
949 * As B is second, it sees A as the previous fence and so waits for
950 * it to complete its transition and takes over the occupancy for
951 * itself -- remembering that it needs to wait on A before executing.
952 *
953 * Note the strong ordering of the timeline also provides consistent
954 * nesting rules for the fence->lock; the inner lock is always the
955 * older lock.
956 */
957 spin_lock_irqsave(fence->lock, flags);
958 prev = xchg(__active_fence_slot(active), fence);
b1e3177b
CW
959 if (prev) {
960 GEM_BUG_ON(prev == fence);
961 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
962 __list_del_entry(&active->cb.node);
963 spin_unlock(prev->lock); /* serialise with prev->cb_list */
d8af05ff 964 }
b1e3177b 965 list_add_tail(&active->cb.node, &fence->cb_list);
b1e3177b
CW
966 spin_unlock_irqrestore(fence->lock, flags);
967
968 return prev;
ce476c80
CW
969}
970
b1e3177b
CW
971int i915_active_fence_set(struct i915_active_fence *active,
972 struct i915_request *rq)
21950ee7 973{
b1e3177b
CW
974 struct dma_fence *fence;
975 int err = 0;
21950ee7 976
b1e3177b
CW
977 /* Must maintain timeline ordering wrt previous active requests */
978 rcu_read_lock();
979 fence = __i915_active_fence_set(active, &rq->fence);
980 if (fence) /* but the previous fence may not belong to that timeline! */
981 fence = dma_fence_get_rcu(fence);
982 rcu_read_unlock();
983 if (fence) {
984 err = i915_request_await_dma_fence(rq, fence);
985 dma_fence_put(fence);
986 }
21950ee7 987
b1e3177b 988 return err;
21950ee7
CW
989}
990
b1e3177b 991void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
21950ee7 992{
df9f85d8 993 active_fence_cb(fence, cb);
21950ee7
CW
994}
995
229007e0
CW
996struct auto_active {
997 struct i915_active base;
998 struct kref ref;
999};
1000
1001struct i915_active *i915_active_get(struct i915_active *ref)
1002{
1003 struct auto_active *aa = container_of(ref, typeof(*aa), base);
1004
1005 kref_get(&aa->ref);
1006 return &aa->base;
1007}
1008
1009static void auto_release(struct kref *ref)
1010{
1011 struct auto_active *aa = container_of(ref, typeof(*aa), ref);
1012
1013 i915_active_fini(&aa->base);
1014 kfree(aa);
1015}
1016
1017void i915_active_put(struct i915_active *ref)
1018{
1019 struct auto_active *aa = container_of(ref, typeof(*aa), base);
1020
1021 kref_put(&aa->ref, auto_release);
1022}
1023
1024static int auto_active(struct i915_active *ref)
1025{
1026 i915_active_get(ref);
1027 return 0;
1028}
1029
1030static void auto_retire(struct i915_active *ref)
1031{
1032 i915_active_put(ref);
1033}
1034
1035struct i915_active *i915_active_create(void)
1036{
1037 struct auto_active *aa;
1038
1039 aa = kmalloc(sizeof(*aa), GFP_KERNEL);
1040 if (!aa)
1041 return NULL;
1042
1043 kref_init(&aa->ref);
1044 i915_active_init(&aa->base, auto_active, auto_retire);
1045
1046 return &aa->base;
1047}
1048
64d6c500
CW
1049#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1050#include "selftests/i915_active.c"
1051#endif
5f5c139d 1052
103b76ee 1053static void i915_global_active_shrink(void)
5f5c139d 1054{
103b76ee 1055 kmem_cache_shrink(global.slab_cache);
5f5c139d
CW
1056}
1057
103b76ee 1058static void i915_global_active_exit(void)
32eb6bcf 1059{
103b76ee 1060 kmem_cache_destroy(global.slab_cache);
32eb6bcf
CW
1061}
1062
103b76ee
CW
1063static struct i915_global_active global = { {
1064 .shrink = i915_global_active_shrink,
1065 .exit = i915_global_active_exit,
1066} };
1067
1068int __init i915_global_active_init(void)
5f5c139d 1069{
103b76ee
CW
1070 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
1071 if (!global.slab_cache)
1072 return -ENOMEM;
1073
1074 i915_global_register(&global.base);
1075 return 0;
5f5c139d 1076}