Commit | Line | Data |
---|---|---|
64d6c500 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2019 Intel Corporation | |
5 | */ | |
6 | ||
5361db1a CW |
7 | #include <linux/debugobjects.h> |
8 | ||
7009db14 CW |
9 | #include "gt/intel_engine_pm.h" |
10 | ||
64d6c500 CW |
11 | #include "i915_drv.h" |
12 | #include "i915_active.h" | |
103b76ee | 13 | #include "i915_globals.h" |
64d6c500 | 14 | |
5f5c139d CW |
15 | /* |
16 | * Active refs memory management | |
17 | * | |
18 | * To be more economical with memory, we reap all the i915_active trees as | |
19 | * they idle (when we know the active requests are inactive) and allocate the | |
20 | * nodes from a local slab cache to hopefully reduce the fragmentation. | |
21 | */ | |
22 | static struct i915_global_active { | |
103b76ee | 23 | struct i915_global base; |
5f5c139d CW |
24 | struct kmem_cache *slab_cache; |
25 | } global; | |
26 | ||
64d6c500 | 27 | struct active_node { |
b1e3177b | 28 | struct i915_active_fence base; |
64d6c500 CW |
29 | struct i915_active *ref; |
30 | struct rb_node node; | |
31 | u64 timeline; | |
32 | }; | |
33 | ||
d8af05ff | 34 | static inline struct active_node * |
b1e3177b | 35 | node_from_active(struct i915_active_fence *active) |
d8af05ff CW |
36 | { |
37 | return container_of(active, struct active_node, base); | |
38 | } | |
39 | ||
40 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) | |
41 | ||
b1e3177b | 42 | static inline bool is_barrier(const struct i915_active_fence *active) |
d8af05ff | 43 | { |
b1e3177b | 44 | return IS_ERR(rcu_access_pointer(active->fence)); |
d8af05ff CW |
45 | } |
46 | ||
47 | static inline struct llist_node *barrier_to_ll(struct active_node *node) | |
48 | { | |
49 | GEM_BUG_ON(!is_barrier(&node->base)); | |
b1e3177b | 50 | return (struct llist_node *)&node->base.cb.node; |
d8af05ff CW |
51 | } |
52 | ||
f130b712 CW |
53 | static inline struct intel_engine_cs * |
54 | __barrier_to_engine(struct active_node *node) | |
55 | { | |
b1e3177b | 56 | return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); |
f130b712 CW |
57 | } |
58 | ||
d8af05ff CW |
59 | static inline struct intel_engine_cs * |
60 | barrier_to_engine(struct active_node *node) | |
61 | { | |
62 | GEM_BUG_ON(!is_barrier(&node->base)); | |
f130b712 | 63 | return __barrier_to_engine(node); |
d8af05ff CW |
64 | } |
65 | ||
66 | static inline struct active_node *barrier_from_ll(struct llist_node *x) | |
67 | { | |
68 | return container_of((struct list_head *)x, | |
b1e3177b | 69 | struct active_node, base.cb.node); |
d8af05ff CW |
70 | } |
71 | ||
5361db1a CW |
72 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
73 | ||
74 | static void *active_debug_hint(void *addr) | |
75 | { | |
76 | struct i915_active *ref = addr; | |
77 | ||
12c255b5 | 78 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
5361db1a CW |
79 | } |
80 | ||
81 | static struct debug_obj_descr active_debug_desc = { | |
82 | .name = "i915_active", | |
83 | .debug_hint = active_debug_hint, | |
84 | }; | |
85 | ||
86 | static void debug_active_init(struct i915_active *ref) | |
87 | { | |
88 | debug_object_init(ref, &active_debug_desc); | |
89 | } | |
90 | ||
91 | static void debug_active_activate(struct i915_active *ref) | |
92 | { | |
f52c6d0d CW |
93 | lockdep_assert_held(&ref->mutex); |
94 | if (!atomic_read(&ref->count)) /* before the first inc */ | |
95 | debug_object_activate(ref, &active_debug_desc); | |
5361db1a CW |
96 | } |
97 | ||
98 | static void debug_active_deactivate(struct i915_active *ref) | |
99 | { | |
f52c6d0d CW |
100 | lockdep_assert_held(&ref->mutex); |
101 | if (!atomic_read(&ref->count)) /* after the last dec */ | |
102 | debug_object_deactivate(ref, &active_debug_desc); | |
5361db1a CW |
103 | } |
104 | ||
105 | static void debug_active_fini(struct i915_active *ref) | |
106 | { | |
107 | debug_object_free(ref, &active_debug_desc); | |
108 | } | |
109 | ||
110 | static void debug_active_assert(struct i915_active *ref) | |
111 | { | |
112 | debug_object_assert_init(ref, &active_debug_desc); | |
113 | } | |
114 | ||
115 | #else | |
116 | ||
117 | static inline void debug_active_init(struct i915_active *ref) { } | |
118 | static inline void debug_active_activate(struct i915_active *ref) { } | |
119 | static inline void debug_active_deactivate(struct i915_active *ref) { } | |
120 | static inline void debug_active_fini(struct i915_active *ref) { } | |
121 | static inline void debug_active_assert(struct i915_active *ref) { } | |
122 | ||
123 | #endif | |
124 | ||
a42375af | 125 | static void |
12c255b5 | 126 | __active_retire(struct i915_active *ref) |
a42375af CW |
127 | { |
128 | struct active_node *it, *n; | |
12c255b5 CW |
129 | struct rb_root root; |
130 | bool retire = false; | |
131 | ||
132 | lockdep_assert_held(&ref->mutex); | |
274cbf20 | 133 | GEM_BUG_ON(i915_active_is_idle(ref)); |
12c255b5 CW |
134 | |
135 | /* return the unused nodes to our slabcache -- flushing the allocator */ | |
136 | if (atomic_dec_and_test(&ref->count)) { | |
137 | debug_active_deactivate(ref); | |
138 | root = ref->tree; | |
139 | ref->tree = RB_ROOT; | |
140 | ref->cache = NULL; | |
141 | retire = true; | |
142 | } | |
a42375af | 143 | |
12c255b5 CW |
144 | mutex_unlock(&ref->mutex); |
145 | if (!retire) | |
146 | return; | |
147 | ||
b1e3177b | 148 | GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); |
12c255b5 | 149 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { |
b1e3177b | 150 | GEM_BUG_ON(i915_active_fence_isset(&it->base)); |
5f5c139d | 151 | kmem_cache_free(global.slab_cache, it); |
a42375af | 152 | } |
e1d7b66b CW |
153 | |
154 | /* After the final retire, the entire struct may be freed */ | |
155 | if (ref->retire) | |
156 | ref->retire(ref); | |
b1e3177b CW |
157 | |
158 | /* ... except if you wait on it, you must manage your own references! */ | |
159 | wake_up_var(ref); | |
a42375af CW |
160 | } |
161 | ||
274cbf20 CW |
162 | static void |
163 | active_work(struct work_struct *wrk) | |
164 | { | |
165 | struct i915_active *ref = container_of(wrk, typeof(*ref), work); | |
166 | ||
167 | GEM_BUG_ON(!atomic_read(&ref->count)); | |
168 | if (atomic_add_unless(&ref->count, -1, 1)) | |
169 | return; | |
170 | ||
171 | mutex_lock(&ref->mutex); | |
172 | __active_retire(ref); | |
173 | } | |
174 | ||
64d6c500 | 175 | static void |
12c255b5 | 176 | active_retire(struct i915_active *ref) |
64d6c500 | 177 | { |
12c255b5 CW |
178 | GEM_BUG_ON(!atomic_read(&ref->count)); |
179 | if (atomic_add_unless(&ref->count, -1, 1)) | |
a42375af CW |
180 | return; |
181 | ||
274cbf20 CW |
182 | /* If we are inside interrupt context (fence signaling), defer */ |
183 | if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || | |
184 | !mutex_trylock(&ref->mutex)) { | |
185 | queue_work(system_unbound_wq, &ref->work); | |
186 | return; | |
187 | } | |
188 | ||
12c255b5 | 189 | __active_retire(ref); |
64d6c500 CW |
190 | } |
191 | ||
192 | static void | |
b1e3177b | 193 | node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) |
64d6c500 | 194 | { |
b1e3177b CW |
195 | i915_active_fence_cb(fence, cb); |
196 | active_retire(container_of(cb, struct active_node, base.cb)->ref); | |
64d6c500 CW |
197 | } |
198 | ||
b1e3177b CW |
199 | static void |
200 | excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) | |
201 | { | |
202 | i915_active_fence_cb(fence, cb); | |
203 | active_retire(container_of(cb, struct i915_active, excl.cb)); | |
204 | } | |
205 | ||
206 | static struct i915_active_fence * | |
25ffd4b1 | 207 | active_instance(struct i915_active *ref, struct intel_timeline *tl) |
64d6c500 | 208 | { |
12c255b5 | 209 | struct active_node *node, *prealloc; |
64d6c500 | 210 | struct rb_node **p, *parent; |
25ffd4b1 | 211 | u64 idx = tl->fence_context; |
64d6c500 CW |
212 | |
213 | /* | |
214 | * We track the most recently used timeline to skip a rbtree search | |
215 | * for the common case, under typical loads we never need the rbtree | |
216 | * at all. We can reuse the last slot if it is empty, that is | |
217 | * after the previous activity has been retired, or if it matches the | |
218 | * current timeline. | |
64d6c500 | 219 | */ |
12c255b5 CW |
220 | node = READ_ONCE(ref->cache); |
221 | if (node && node->timeline == idx) | |
222 | return &node->base; | |
223 | ||
224 | /* Preallocate a replacement, just in case */ | |
225 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
226 | if (!prealloc) | |
227 | return NULL; | |
64d6c500 | 228 | |
12c255b5 CW |
229 | mutex_lock(&ref->mutex); |
230 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
64d6c500 CW |
231 | |
232 | parent = NULL; | |
233 | p = &ref->tree.rb_node; | |
234 | while (*p) { | |
235 | parent = *p; | |
236 | ||
237 | node = rb_entry(parent, struct active_node, node); | |
12c255b5 CW |
238 | if (node->timeline == idx) { |
239 | kmem_cache_free(global.slab_cache, prealloc); | |
240 | goto out; | |
241 | } | |
64d6c500 CW |
242 | |
243 | if (node->timeline < idx) | |
244 | p = &parent->rb_right; | |
245 | else | |
246 | p = &parent->rb_left; | |
247 | } | |
248 | ||
12c255b5 | 249 | node = prealloc; |
b1e3177b | 250 | __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire); |
64d6c500 CW |
251 | node->ref = ref; |
252 | node->timeline = idx; | |
253 | ||
254 | rb_link_node(&node->node, parent, p); | |
255 | rb_insert_color(&node->node, &ref->tree); | |
256 | ||
64d6c500 | 257 | out: |
12c255b5 CW |
258 | ref->cache = node; |
259 | mutex_unlock(&ref->mutex); | |
260 | ||
d8af05ff | 261 | BUILD_BUG_ON(offsetof(typeof(*node), base)); |
12c255b5 | 262 | return &node->base; |
64d6c500 CW |
263 | } |
264 | ||
b1e3177b | 265 | void __i915_active_init(struct i915_active *ref, |
12c255b5 CW |
266 | int (*active)(struct i915_active *ref), |
267 | void (*retire)(struct i915_active *ref), | |
268 | struct lock_class_key *key) | |
64d6c500 | 269 | { |
274cbf20 CW |
270 | unsigned long bits; |
271 | ||
5361db1a CW |
272 | debug_active_init(ref); |
273 | ||
79c7a28e | 274 | ref->flags = 0; |
12c255b5 | 275 | ref->active = active; |
274cbf20 CW |
276 | ref->retire = ptr_unpack_bits(retire, &bits, 2); |
277 | if (bits & I915_ACTIVE_MAY_SLEEP) | |
278 | ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; | |
2850748e | 279 | |
64d6c500 | 280 | ref->tree = RB_ROOT; |
12c255b5 | 281 | ref->cache = NULL; |
d8af05ff | 282 | init_llist_head(&ref->preallocated_barriers); |
12c255b5 CW |
283 | atomic_set(&ref->count, 0); |
284 | __mutex_init(&ref->mutex, "i915_active", key); | |
b1e3177b | 285 | __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire); |
274cbf20 | 286 | INIT_WORK(&ref->work, active_work); |
64d6c500 CW |
287 | } |
288 | ||
f130b712 CW |
289 | static bool ____active_del_barrier(struct i915_active *ref, |
290 | struct active_node *node, | |
291 | struct intel_engine_cs *engine) | |
292 | ||
d8af05ff | 293 | { |
d8af05ff CW |
294 | struct llist_node *head = NULL, *tail = NULL; |
295 | struct llist_node *pos, *next; | |
296 | ||
75d0a7f3 | 297 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); |
d8af05ff CW |
298 | |
299 | /* | |
300 | * Rebuild the llist excluding our node. We may perform this | |
301 | * outside of the kernel_context timeline mutex and so someone | |
302 | * else may be manipulating the engine->barrier_tasks, in | |
303 | * which case either we or they will be upset :) | |
304 | * | |
305 | * A second __active_del_barrier() will report failure to claim | |
306 | * the active_node and the caller will just shrug and know not to | |
307 | * claim ownership of its node. | |
308 | * | |
309 | * A concurrent i915_request_add_active_barriers() will miss adding | |
310 | * any of the tasks, but we will try again on the next -- and since | |
311 | * we are actively using the barrier, we know that there will be | |
312 | * at least another opportunity when we idle. | |
313 | */ | |
314 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { | |
315 | if (node == barrier_from_ll(pos)) { | |
316 | node = NULL; | |
317 | continue; | |
318 | } | |
319 | ||
320 | pos->next = head; | |
321 | head = pos; | |
322 | if (!tail) | |
323 | tail = pos; | |
324 | } | |
325 | if (head) | |
326 | llist_add_batch(head, tail, &engine->barrier_tasks); | |
327 | ||
328 | return !node; | |
329 | } | |
330 | ||
f130b712 CW |
331 | static bool |
332 | __active_del_barrier(struct i915_active *ref, struct active_node *node) | |
333 | { | |
334 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); | |
335 | } | |
336 | ||
64d6c500 | 337 | int i915_active_ref(struct i915_active *ref, |
25ffd4b1 | 338 | struct intel_timeline *tl, |
b1e3177b | 339 | struct dma_fence *fence) |
64d6c500 | 340 | { |
b1e3177b | 341 | struct i915_active_fence *active; |
12c255b5 | 342 | int err; |
312c4ba1 | 343 | |
25ffd4b1 CW |
344 | lockdep_assert_held(&tl->mutex); |
345 | ||
312c4ba1 | 346 | /* Prevent reaping in case we malloc/wait while building the tree */ |
12c255b5 CW |
347 | err = i915_active_acquire(ref); |
348 | if (err) | |
349 | return err; | |
64d6c500 | 350 | |
25ffd4b1 | 351 | active = active_instance(ref, tl); |
12c255b5 CW |
352 | if (!active) { |
353 | err = -ENOMEM; | |
312c4ba1 CW |
354 | goto out; |
355 | } | |
64d6c500 | 356 | |
d8af05ff CW |
357 | if (is_barrier(active)) { /* proto-node used by our idle barrier */ |
358 | /* | |
359 | * This request is on the kernel_context timeline, and so | |
360 | * we can use it to substitute for the pending idle-barrer | |
361 | * request that we want to emit on the kernel_context. | |
362 | */ | |
363 | __active_del_barrier(ref, node_from_active(active)); | |
b1e3177b CW |
364 | RCU_INIT_POINTER(active->fence, NULL); |
365 | atomic_dec(&ref->count); | |
d8af05ff | 366 | } |
b1e3177b CW |
367 | if (!__i915_active_fence_set(active, fence)) |
368 | atomic_inc(&ref->count); | |
64d6c500 | 369 | |
312c4ba1 CW |
370 | out: |
371 | i915_active_release(ref); | |
372 | return err; | |
64d6c500 CW |
373 | } |
374 | ||
2850748e CW |
375 | void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) |
376 | { | |
377 | /* We expect the caller to manage the exclusive timeline ordering */ | |
378 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
379 | ||
b1e3177b CW |
380 | /* |
381 | * As we don't know which mutex the caller is using, we told a small | |
382 | * lie to the debug code that it is using the i915_active.mutex; | |
383 | * and now we must stick to that lie. | |
384 | */ | |
385 | mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_); | |
386 | if (!__i915_active_fence_set(&ref->excl, f)) | |
387 | atomic_inc(&ref->count); | |
388 | mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_); | |
389 | } | |
2850748e | 390 | |
b1e3177b CW |
391 | bool i915_active_acquire_if_busy(struct i915_active *ref) |
392 | { | |
393 | debug_active_assert(ref); | |
394 | return atomic_add_unless(&ref->count, 1, 0); | |
2850748e CW |
395 | } |
396 | ||
12c255b5 | 397 | int i915_active_acquire(struct i915_active *ref) |
64d6c500 | 398 | { |
12c255b5 CW |
399 | int err; |
400 | ||
b1e3177b | 401 | if (i915_active_acquire_if_busy(ref)) |
12c255b5 | 402 | return 0; |
5361db1a | 403 | |
12c255b5 CW |
404 | err = mutex_lock_interruptible(&ref->mutex); |
405 | if (err) | |
406 | return err; | |
5361db1a | 407 | |
12c255b5 CW |
408 | if (!atomic_read(&ref->count) && ref->active) |
409 | err = ref->active(ref); | |
410 | if (!err) { | |
411 | debug_active_activate(ref); | |
412 | atomic_inc(&ref->count); | |
413 | } | |
414 | ||
415 | mutex_unlock(&ref->mutex); | |
416 | ||
417 | return err; | |
64d6c500 CW |
418 | } |
419 | ||
420 | void i915_active_release(struct i915_active *ref) | |
421 | { | |
5361db1a | 422 | debug_active_assert(ref); |
12c255b5 | 423 | active_retire(ref); |
64d6c500 CW |
424 | } |
425 | ||
b1e3177b | 426 | static void enable_signaling(struct i915_active_fence *active) |
79c7a28e | 427 | { |
b1e3177b | 428 | struct dma_fence *fence; |
79c7a28e | 429 | |
b1e3177b CW |
430 | fence = i915_active_fence_get(active); |
431 | if (!fence) | |
432 | return; | |
2850748e | 433 | |
b1e3177b CW |
434 | dma_fence_enable_sw_signaling(fence); |
435 | dma_fence_put(fence); | |
2850748e CW |
436 | } |
437 | ||
64d6c500 CW |
438 | int i915_active_wait(struct i915_active *ref) |
439 | { | |
440 | struct active_node *it, *n; | |
b1e3177b | 441 | int err = 0; |
64d6c500 | 442 | |
12c255b5 | 443 | might_sleep(); |
12c255b5 | 444 | |
b1e3177b | 445 | if (!i915_active_acquire_if_busy(ref)) |
12c255b5 | 446 | return 0; |
2850748e | 447 | |
b1e3177b CW |
448 | /* Flush lazy signals */ |
449 | enable_signaling(&ref->excl); | |
64d6c500 | 450 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
b1e3177b CW |
451 | if (is_barrier(&it->base)) /* unconnected idle barrier */ |
452 | continue; | |
d8af05ff | 453 | |
b1e3177b | 454 | enable_signaling(&it->base); |
64d6c500 | 455 | } |
b1e3177b | 456 | /* Any fence added after the wait begins will not be auto-signaled */ |
64d6c500 | 457 | |
b1e3177b | 458 | i915_active_release(ref); |
afd1bcd4 CW |
459 | if (err) |
460 | return err; | |
461 | ||
b1e3177b | 462 | if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) |
79c7a28e CW |
463 | return -EINTR; |
464 | ||
afd1bcd4 | 465 | return 0; |
64d6c500 CW |
466 | } |
467 | ||
64d6c500 CW |
468 | int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) |
469 | { | |
2850748e | 470 | int err = 0; |
64d6c500 | 471 | |
b1e3177b | 472 | if (rcu_access_pointer(ref->excl.fence)) { |
2850748e | 473 | struct dma_fence *fence; |
312c4ba1 | 474 | |
2850748e | 475 | rcu_read_lock(); |
b1e3177b | 476 | fence = dma_fence_get_rcu_safe(&ref->excl.fence); |
2850748e CW |
477 | rcu_read_unlock(); |
478 | if (fence) { | |
479 | err = i915_request_await_dma_fence(rq, fence); | |
480 | dma_fence_put(fence); | |
481 | } | |
64d6c500 CW |
482 | } |
483 | ||
2850748e CW |
484 | /* In the future we may choose to await on all fences */ |
485 | ||
312c4ba1 | 486 | return err; |
64d6c500 CW |
487 | } |
488 | ||
a42375af | 489 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
64d6c500 CW |
490 | void i915_active_fini(struct i915_active *ref) |
491 | { | |
5361db1a | 492 | debug_active_fini(ref); |
12c255b5 | 493 | GEM_BUG_ON(atomic_read(&ref->count)); |
274cbf20 CW |
494 | GEM_BUG_ON(work_pending(&ref->work)); |
495 | GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); | |
12c255b5 | 496 | mutex_destroy(&ref->mutex); |
64d6c500 | 497 | } |
a42375af | 498 | #endif |
64d6c500 | 499 | |
d8af05ff CW |
500 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) |
501 | { | |
b1e3177b | 502 | return node->timeline == idx && !i915_active_fence_isset(&node->base); |
d8af05ff CW |
503 | } |
504 | ||
505 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) | |
506 | { | |
507 | struct rb_node *prev, *p; | |
508 | ||
509 | if (RB_EMPTY_ROOT(&ref->tree)) | |
510 | return NULL; | |
511 | ||
512 | mutex_lock(&ref->mutex); | |
513 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
514 | ||
515 | /* | |
516 | * Try to reuse any existing barrier nodes already allocated for this | |
517 | * i915_active, due to overlapping active phases there is likely a | |
518 | * node kept alive (as we reuse before parking). We prefer to reuse | |
519 | * completely idle barriers (less hassle in manipulating the llists), | |
520 | * but otherwise any will do. | |
521 | */ | |
522 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { | |
523 | p = &ref->cache->node; | |
524 | goto match; | |
525 | } | |
526 | ||
527 | prev = NULL; | |
528 | p = ref->tree.rb_node; | |
529 | while (p) { | |
530 | struct active_node *node = | |
531 | rb_entry(p, struct active_node, node); | |
532 | ||
533 | if (is_idle_barrier(node, idx)) | |
534 | goto match; | |
535 | ||
536 | prev = p; | |
537 | if (node->timeline < idx) | |
538 | p = p->rb_right; | |
539 | else | |
540 | p = p->rb_left; | |
541 | } | |
542 | ||
543 | /* | |
544 | * No quick match, but we did find the leftmost rb_node for the | |
545 | * kernel_context. Walk the rb_tree in-order to see if there were | |
546 | * any idle-barriers on this timeline that we missed, or just use | |
547 | * the first pending barrier. | |
548 | */ | |
549 | for (p = prev; p; p = rb_next(p)) { | |
550 | struct active_node *node = | |
551 | rb_entry(p, struct active_node, node); | |
f130b712 | 552 | struct intel_engine_cs *engine; |
d8af05ff CW |
553 | |
554 | if (node->timeline > idx) | |
555 | break; | |
556 | ||
557 | if (node->timeline < idx) | |
558 | continue; | |
559 | ||
560 | if (is_idle_barrier(node, idx)) | |
561 | goto match; | |
562 | ||
563 | /* | |
564 | * The list of pending barriers is protected by the | |
565 | * kernel_context timeline, which notably we do not hold | |
566 | * here. i915_request_add_active_barriers() may consume | |
567 | * the barrier before we claim it, so we have to check | |
568 | * for success. | |
569 | */ | |
f130b712 CW |
570 | engine = __barrier_to_engine(node); |
571 | smp_rmb(); /* serialise with add_active_barriers */ | |
572 | if (is_barrier(&node->base) && | |
573 | ____active_del_barrier(ref, node, engine)) | |
d8af05ff CW |
574 | goto match; |
575 | } | |
576 | ||
577 | mutex_unlock(&ref->mutex); | |
578 | ||
579 | return NULL; | |
580 | ||
581 | match: | |
582 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ | |
583 | if (p == &ref->cache->node) | |
584 | ref->cache = NULL; | |
585 | mutex_unlock(&ref->mutex); | |
586 | ||
587 | return rb_entry(p, struct active_node, node); | |
588 | } | |
589 | ||
ce476c80 CW |
590 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
591 | struct intel_engine_cs *engine) | |
592 | { | |
3f99a614 | 593 | intel_engine_mask_t tmp, mask = engine->mask; |
a50134b1 | 594 | struct intel_gt *gt = engine->gt; |
7009db14 | 595 | struct llist_node *pos, *next; |
7009db14 | 596 | int err; |
ce476c80 | 597 | |
d8af05ff CW |
598 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); |
599 | ||
600 | /* | |
601 | * Preallocate a node for each physical engine supporting the target | |
602 | * engine (remember virtual engines have more than one sibling). | |
603 | * We can then use the preallocated nodes in | |
604 | * i915_active_acquire_barrier() | |
605 | */ | |
a50134b1 | 606 | for_each_engine_masked(engine, gt, mask, tmp) { |
75d0a7f3 | 607 | u64 idx = engine->kernel_context->timeline->fence_context; |
ce476c80 CW |
608 | struct active_node *node; |
609 | ||
d8af05ff CW |
610 | node = reuse_idle_barrier(ref, idx); |
611 | if (!node) { | |
612 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
613 | if (!node) { | |
614 | err = ENOMEM; | |
615 | goto unwind; | |
616 | } | |
617 | ||
25ffd4b1 CW |
618 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
619 | node->base.lock = | |
620 | &engine->kernel_context->timeline->mutex; | |
621 | #endif | |
b1e3177b CW |
622 | RCU_INIT_POINTER(node->base.fence, NULL); |
623 | node->base.cb.func = node_retire; | |
d8af05ff CW |
624 | node->timeline = idx; |
625 | node->ref = ref; | |
ce476c80 CW |
626 | } |
627 | ||
b1e3177b | 628 | if (!i915_active_fence_isset(&node->base)) { |
d8af05ff CW |
629 | /* |
630 | * Mark this as being *our* unconnected proto-node. | |
631 | * | |
632 | * Since this node is not in any list, and we have | |
633 | * decoupled it from the rbtree, we can reuse the | |
634 | * request to indicate this is an idle-barrier node | |
635 | * and then we can use the rb_node and list pointers | |
636 | * for our tracking of the pending barrier. | |
637 | */ | |
b1e3177b CW |
638 | RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); |
639 | node->base.cb.node.prev = (void *)engine; | |
d8af05ff CW |
640 | atomic_inc(&ref->count); |
641 | } | |
ce476c80 | 642 | |
d8af05ff CW |
643 | GEM_BUG_ON(barrier_to_engine(node) != engine); |
644 | llist_add(barrier_to_ll(node), &ref->preallocated_barriers); | |
7009db14 | 645 | intel_engine_pm_get(engine); |
ce476c80 CW |
646 | } |
647 | ||
7009db14 CW |
648 | return 0; |
649 | ||
650 | unwind: | |
d8af05ff CW |
651 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
652 | struct active_node *node = barrier_from_ll(pos); | |
7009db14 | 653 | |
d8af05ff CW |
654 | atomic_dec(&ref->count); |
655 | intel_engine_pm_put(barrier_to_engine(node)); | |
7009db14 | 656 | |
7009db14 CW |
657 | kmem_cache_free(global.slab_cache, node); |
658 | } | |
ce476c80 CW |
659 | return err; |
660 | } | |
661 | ||
662 | void i915_active_acquire_barrier(struct i915_active *ref) | |
663 | { | |
664 | struct llist_node *pos, *next; | |
665 | ||
12c255b5 | 666 | GEM_BUG_ON(i915_active_is_idle(ref)); |
ce476c80 | 667 | |
d8af05ff CW |
668 | /* |
669 | * Transfer the list of preallocated barriers into the | |
670 | * i915_active rbtree, but only as proto-nodes. They will be | |
671 | * populated by i915_request_add_active_barriers() to point to the | |
672 | * request that will eventually release them. | |
673 | */ | |
12c255b5 | 674 | mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); |
d8af05ff CW |
675 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
676 | struct active_node *node = barrier_from_ll(pos); | |
677 | struct intel_engine_cs *engine = barrier_to_engine(node); | |
ce476c80 CW |
678 | struct rb_node **p, *parent; |
679 | ||
ce476c80 CW |
680 | parent = NULL; |
681 | p = &ref->tree.rb_node; | |
682 | while (*p) { | |
d8af05ff CW |
683 | struct active_node *it; |
684 | ||
ce476c80 | 685 | parent = *p; |
d8af05ff CW |
686 | |
687 | it = rb_entry(parent, struct active_node, node); | |
688 | if (it->timeline < node->timeline) | |
ce476c80 CW |
689 | p = &parent->rb_right; |
690 | else | |
691 | p = &parent->rb_left; | |
692 | } | |
693 | rb_link_node(&node->node, parent, p); | |
694 | rb_insert_color(&node->node, &ref->tree); | |
695 | ||
b7234840 | 696 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
d8af05ff | 697 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); |
7009db14 | 698 | intel_engine_pm_put(engine); |
ce476c80 | 699 | } |
12c255b5 | 700 | mutex_unlock(&ref->mutex); |
ce476c80 CW |
701 | } |
702 | ||
d8af05ff | 703 | void i915_request_add_active_barriers(struct i915_request *rq) |
ce476c80 CW |
704 | { |
705 | struct intel_engine_cs *engine = rq->engine; | |
706 | struct llist_node *node, *next; | |
b1e3177b | 707 | unsigned long flags; |
ce476c80 | 708 | |
d8af05ff | 709 | GEM_BUG_ON(intel_engine_is_virtual(engine)); |
d19d71fc | 710 | GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); |
d8af05ff | 711 | |
b1e3177b CW |
712 | node = llist_del_all(&engine->barrier_tasks); |
713 | if (!node) | |
714 | return; | |
d8af05ff CW |
715 | /* |
716 | * Attach the list of proto-fences to the in-flight request such | |
717 | * that the parent i915_active will be released when this request | |
718 | * is retired. | |
719 | */ | |
b1e3177b CW |
720 | spin_lock_irqsave(&rq->lock, flags); |
721 | llist_for_each_safe(node, next, node) { | |
722 | RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence); | |
f130b712 | 723 | smp_wmb(); /* serialise with reuse_idle_barrier */ |
b1e3177b CW |
724 | list_add_tail((struct list_head *)node, &rq->fence.cb_list); |
725 | } | |
726 | spin_unlock_irqrestore(&rq->lock, flags); | |
727 | } | |
728 | ||
729 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) | |
730 | #define active_is_held(active) lockdep_is_held((active)->lock) | |
731 | #else | |
732 | #define active_is_held(active) true | |
733 | #endif | |
734 | ||
735 | /* | |
736 | * __i915_active_fence_set: Update the last active fence along its timeline | |
737 | * @active: the active tracker | |
738 | * @fence: the new fence (under construction) | |
739 | * | |
740 | * Records the new @fence as the last active fence along its timeline in | |
741 | * this active tracker, moving the tracking callbacks from the previous | |
742 | * fence onto this one. Returns the previous fence (if not already completed), | |
743 | * which the caller must ensure is executed before the new fence. To ensure | |
744 | * that the order of fences within the timeline of the i915_active_fence is | |
745 | * maintained, it must be locked by the caller. | |
746 | */ | |
747 | struct dma_fence * | |
748 | __i915_active_fence_set(struct i915_active_fence *active, | |
749 | struct dma_fence *fence) | |
750 | { | |
751 | struct dma_fence *prev; | |
752 | unsigned long flags; | |
753 | ||
754 | /* NB: must be serialised by an outer timeline mutex (active->lock) */ | |
755 | spin_lock_irqsave(fence->lock, flags); | |
756 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); | |
757 | ||
758 | prev = rcu_dereference_protected(active->fence, active_is_held(active)); | |
759 | if (prev) { | |
760 | GEM_BUG_ON(prev == fence); | |
761 | spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); | |
762 | __list_del_entry(&active->cb.node); | |
763 | spin_unlock(prev->lock); /* serialise with prev->cb_list */ | |
764 | ||
765 | /* | |
766 | * active->fence is reset by the callback from inside | |
767 | * interrupt context. We need to serialise our list | |
768 | * manipulation with the fence->lock to prevent the prev | |
769 | * being lost inside an interrupt (it can't be replaced as | |
770 | * no other caller is allowed to enter __i915_active_fence_set | |
771 | * as we hold the timeline lock). After serialising with | |
772 | * the callback, we need to double check which ran first, | |
773 | * our list_del() [decoupling prev from the callback] or | |
774 | * the callback... | |
775 | */ | |
776 | prev = rcu_access_pointer(active->fence); | |
d8af05ff | 777 | } |
b1e3177b CW |
778 | |
779 | rcu_assign_pointer(active->fence, fence); | |
780 | list_add_tail(&active->cb.node, &fence->cb_list); | |
781 | ||
782 | spin_unlock_irqrestore(fence->lock, flags); | |
783 | ||
784 | return prev; | |
ce476c80 CW |
785 | } |
786 | ||
b1e3177b CW |
787 | int i915_active_fence_set(struct i915_active_fence *active, |
788 | struct i915_request *rq) | |
21950ee7 | 789 | { |
b1e3177b CW |
790 | struct dma_fence *fence; |
791 | int err = 0; | |
21950ee7 | 792 | |
25ffd4b1 CW |
793 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
794 | lockdep_assert_held(active->lock); | |
795 | #endif | |
796 | ||
b1e3177b CW |
797 | /* Must maintain timeline ordering wrt previous active requests */ |
798 | rcu_read_lock(); | |
799 | fence = __i915_active_fence_set(active, &rq->fence); | |
800 | if (fence) /* but the previous fence may not belong to that timeline! */ | |
801 | fence = dma_fence_get_rcu(fence); | |
802 | rcu_read_unlock(); | |
803 | if (fence) { | |
804 | err = i915_request_await_dma_fence(rq, fence); | |
805 | dma_fence_put(fence); | |
806 | } | |
21950ee7 | 807 | |
b1e3177b | 808 | return err; |
21950ee7 CW |
809 | } |
810 | ||
b1e3177b | 811 | void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) |
21950ee7 | 812 | { |
b1e3177b | 813 | i915_active_fence_cb(fence, cb); |
21950ee7 CW |
814 | } |
815 | ||
64d6c500 CW |
816 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
817 | #include "selftests/i915_active.c" | |
818 | #endif | |
5f5c139d | 819 | |
103b76ee | 820 | static void i915_global_active_shrink(void) |
5f5c139d | 821 | { |
103b76ee | 822 | kmem_cache_shrink(global.slab_cache); |
5f5c139d CW |
823 | } |
824 | ||
103b76ee | 825 | static void i915_global_active_exit(void) |
32eb6bcf | 826 | { |
103b76ee | 827 | kmem_cache_destroy(global.slab_cache); |
32eb6bcf CW |
828 | } |
829 | ||
103b76ee CW |
830 | static struct i915_global_active global = { { |
831 | .shrink = i915_global_active_shrink, | |
832 | .exit = i915_global_active_exit, | |
833 | } }; | |
834 | ||
835 | int __init i915_global_active_init(void) | |
5f5c139d | 836 | { |
103b76ee CW |
837 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
838 | if (!global.slab_cache) | |
839 | return -ENOMEM; | |
840 | ||
841 | i915_global_register(&global.base); | |
842 | return 0; | |
5f5c139d | 843 | } |