Commit | Line | Data |
---|---|---|
64d6c500 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2019 Intel Corporation | |
5 | */ | |
6 | ||
5361db1a CW |
7 | #include <linux/debugobjects.h> |
8 | ||
7009db14 CW |
9 | #include "gt/intel_engine_pm.h" |
10 | ||
64d6c500 CW |
11 | #include "i915_drv.h" |
12 | #include "i915_active.h" | |
103b76ee | 13 | #include "i915_globals.h" |
64d6c500 CW |
14 | |
15 | #define BKL(ref) (&(ref)->i915->drm.struct_mutex) | |
16 | ||
5f5c139d CW |
17 | /* |
18 | * Active refs memory management | |
19 | * | |
20 | * To be more economical with memory, we reap all the i915_active trees as | |
21 | * they idle (when we know the active requests are inactive) and allocate the | |
22 | * nodes from a local slab cache to hopefully reduce the fragmentation. | |
23 | */ | |
24 | static struct i915_global_active { | |
103b76ee | 25 | struct i915_global base; |
5f5c139d CW |
26 | struct kmem_cache *slab_cache; |
27 | } global; | |
28 | ||
64d6c500 | 29 | struct active_node { |
21950ee7 | 30 | struct i915_active_request base; |
64d6c500 CW |
31 | struct i915_active *ref; |
32 | struct rb_node node; | |
33 | u64 timeline; | |
34 | }; | |
35 | ||
d8af05ff CW |
36 | static inline struct active_node * |
37 | node_from_active(struct i915_active_request *active) | |
38 | { | |
39 | return container_of(active, struct active_node, base); | |
40 | } | |
41 | ||
42 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) | |
43 | ||
44 | static inline bool is_barrier(const struct i915_active_request *active) | |
45 | { | |
46 | return IS_ERR(rcu_access_pointer(active->request)); | |
47 | } | |
48 | ||
49 | static inline struct llist_node *barrier_to_ll(struct active_node *node) | |
50 | { | |
51 | GEM_BUG_ON(!is_barrier(&node->base)); | |
52 | return (struct llist_node *)&node->base.link; | |
53 | } | |
54 | ||
f130b712 CW |
55 | static inline struct intel_engine_cs * |
56 | __barrier_to_engine(struct active_node *node) | |
57 | { | |
58 | return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); | |
59 | } | |
60 | ||
d8af05ff CW |
61 | static inline struct intel_engine_cs * |
62 | barrier_to_engine(struct active_node *node) | |
63 | { | |
64 | GEM_BUG_ON(!is_barrier(&node->base)); | |
f130b712 | 65 | return __barrier_to_engine(node); |
d8af05ff CW |
66 | } |
67 | ||
68 | static inline struct active_node *barrier_from_ll(struct llist_node *x) | |
69 | { | |
70 | return container_of((struct list_head *)x, | |
71 | struct active_node, base.link); | |
72 | } | |
73 | ||
5361db1a CW |
74 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
75 | ||
76 | static void *active_debug_hint(void *addr) | |
77 | { | |
78 | struct i915_active *ref = addr; | |
79 | ||
12c255b5 | 80 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
5361db1a CW |
81 | } |
82 | ||
83 | static struct debug_obj_descr active_debug_desc = { | |
84 | .name = "i915_active", | |
85 | .debug_hint = active_debug_hint, | |
86 | }; | |
87 | ||
88 | static void debug_active_init(struct i915_active *ref) | |
89 | { | |
90 | debug_object_init(ref, &active_debug_desc); | |
91 | } | |
92 | ||
93 | static void debug_active_activate(struct i915_active *ref) | |
94 | { | |
95 | debug_object_activate(ref, &active_debug_desc); | |
96 | } | |
97 | ||
98 | static void debug_active_deactivate(struct i915_active *ref) | |
99 | { | |
100 | debug_object_deactivate(ref, &active_debug_desc); | |
101 | } | |
102 | ||
103 | static void debug_active_fini(struct i915_active *ref) | |
104 | { | |
105 | debug_object_free(ref, &active_debug_desc); | |
106 | } | |
107 | ||
108 | static void debug_active_assert(struct i915_active *ref) | |
109 | { | |
110 | debug_object_assert_init(ref, &active_debug_desc); | |
111 | } | |
112 | ||
113 | #else | |
114 | ||
115 | static inline void debug_active_init(struct i915_active *ref) { } | |
116 | static inline void debug_active_activate(struct i915_active *ref) { } | |
117 | static inline void debug_active_deactivate(struct i915_active *ref) { } | |
118 | static inline void debug_active_fini(struct i915_active *ref) { } | |
119 | static inline void debug_active_assert(struct i915_active *ref) { } | |
120 | ||
121 | #endif | |
122 | ||
a42375af | 123 | static void |
12c255b5 | 124 | __active_retire(struct i915_active *ref) |
a42375af CW |
125 | { |
126 | struct active_node *it, *n; | |
12c255b5 CW |
127 | struct rb_root root; |
128 | bool retire = false; | |
129 | ||
130 | lockdep_assert_held(&ref->mutex); | |
131 | ||
132 | /* return the unused nodes to our slabcache -- flushing the allocator */ | |
133 | if (atomic_dec_and_test(&ref->count)) { | |
134 | debug_active_deactivate(ref); | |
135 | root = ref->tree; | |
136 | ref->tree = RB_ROOT; | |
137 | ref->cache = NULL; | |
138 | retire = true; | |
139 | } | |
a42375af | 140 | |
12c255b5 CW |
141 | mutex_unlock(&ref->mutex); |
142 | if (!retire) | |
143 | return; | |
144 | ||
12c255b5 | 145 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { |
21950ee7 | 146 | GEM_BUG_ON(i915_active_request_isset(&it->base)); |
5f5c139d | 147 | kmem_cache_free(global.slab_cache, it); |
a42375af | 148 | } |
e1d7b66b CW |
149 | |
150 | /* After the final retire, the entire struct may be freed */ | |
151 | if (ref->retire) | |
152 | ref->retire(ref); | |
a42375af CW |
153 | } |
154 | ||
64d6c500 | 155 | static void |
12c255b5 | 156 | active_retire(struct i915_active *ref) |
64d6c500 | 157 | { |
12c255b5 CW |
158 | GEM_BUG_ON(!atomic_read(&ref->count)); |
159 | if (atomic_add_unless(&ref->count, -1, 1)) | |
a42375af CW |
160 | return; |
161 | ||
12c255b5 CW |
162 | /* One active may be flushed from inside the acquire of another */ |
163 | mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); | |
164 | __active_retire(ref); | |
64d6c500 CW |
165 | } |
166 | ||
167 | static void | |
21950ee7 | 168 | node_retire(struct i915_active_request *base, struct i915_request *rq) |
64d6c500 | 169 | { |
d8af05ff | 170 | active_retire(node_from_active(base)->ref); |
64d6c500 CW |
171 | } |
172 | ||
21950ee7 | 173 | static struct i915_active_request * |
25ffd4b1 | 174 | active_instance(struct i915_active *ref, struct intel_timeline *tl) |
64d6c500 | 175 | { |
12c255b5 | 176 | struct active_node *node, *prealloc; |
64d6c500 | 177 | struct rb_node **p, *parent; |
25ffd4b1 | 178 | u64 idx = tl->fence_context; |
64d6c500 CW |
179 | |
180 | /* | |
181 | * We track the most recently used timeline to skip a rbtree search | |
182 | * for the common case, under typical loads we never need the rbtree | |
183 | * at all. We can reuse the last slot if it is empty, that is | |
184 | * after the previous activity has been retired, or if it matches the | |
185 | * current timeline. | |
64d6c500 | 186 | */ |
12c255b5 CW |
187 | node = READ_ONCE(ref->cache); |
188 | if (node && node->timeline == idx) | |
189 | return &node->base; | |
190 | ||
191 | /* Preallocate a replacement, just in case */ | |
192 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
193 | if (!prealloc) | |
194 | return NULL; | |
64d6c500 | 195 | |
12c255b5 CW |
196 | mutex_lock(&ref->mutex); |
197 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
64d6c500 CW |
198 | |
199 | parent = NULL; | |
200 | p = &ref->tree.rb_node; | |
201 | while (*p) { | |
202 | parent = *p; | |
203 | ||
204 | node = rb_entry(parent, struct active_node, node); | |
12c255b5 CW |
205 | if (node->timeline == idx) { |
206 | kmem_cache_free(global.slab_cache, prealloc); | |
207 | goto out; | |
208 | } | |
64d6c500 CW |
209 | |
210 | if (node->timeline < idx) | |
211 | p = &parent->rb_right; | |
212 | else | |
213 | p = &parent->rb_left; | |
214 | } | |
215 | ||
12c255b5 | 216 | node = prealloc; |
25ffd4b1 | 217 | i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); |
64d6c500 CW |
218 | node->ref = ref; |
219 | node->timeline = idx; | |
220 | ||
221 | rb_link_node(&node->node, parent, p); | |
222 | rb_insert_color(&node->node, &ref->tree); | |
223 | ||
64d6c500 | 224 | out: |
12c255b5 CW |
225 | ref->cache = node; |
226 | mutex_unlock(&ref->mutex); | |
227 | ||
d8af05ff | 228 | BUILD_BUG_ON(offsetof(typeof(*node), base)); |
12c255b5 | 229 | return &node->base; |
64d6c500 CW |
230 | } |
231 | ||
12c255b5 CW |
232 | void __i915_active_init(struct drm_i915_private *i915, |
233 | struct i915_active *ref, | |
234 | int (*active)(struct i915_active *ref), | |
235 | void (*retire)(struct i915_active *ref), | |
236 | struct lock_class_key *key) | |
64d6c500 | 237 | { |
5361db1a CW |
238 | debug_active_init(ref); |
239 | ||
64d6c500 | 240 | ref->i915 = i915; |
79c7a28e | 241 | ref->flags = 0; |
12c255b5 | 242 | ref->active = active; |
64d6c500 CW |
243 | ref->retire = retire; |
244 | ref->tree = RB_ROOT; | |
12c255b5 | 245 | ref->cache = NULL; |
d8af05ff | 246 | init_llist_head(&ref->preallocated_barriers); |
12c255b5 CW |
247 | atomic_set(&ref->count, 0); |
248 | __mutex_init(&ref->mutex, "i915_active", key); | |
64d6c500 CW |
249 | } |
250 | ||
f130b712 CW |
251 | static bool ____active_del_barrier(struct i915_active *ref, |
252 | struct active_node *node, | |
253 | struct intel_engine_cs *engine) | |
254 | ||
d8af05ff | 255 | { |
d8af05ff CW |
256 | struct llist_node *head = NULL, *tail = NULL; |
257 | struct llist_node *pos, *next; | |
258 | ||
75d0a7f3 | 259 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); |
d8af05ff CW |
260 | |
261 | /* | |
262 | * Rebuild the llist excluding our node. We may perform this | |
263 | * outside of the kernel_context timeline mutex and so someone | |
264 | * else may be manipulating the engine->barrier_tasks, in | |
265 | * which case either we or they will be upset :) | |
266 | * | |
267 | * A second __active_del_barrier() will report failure to claim | |
268 | * the active_node and the caller will just shrug and know not to | |
269 | * claim ownership of its node. | |
270 | * | |
271 | * A concurrent i915_request_add_active_barriers() will miss adding | |
272 | * any of the tasks, but we will try again on the next -- and since | |
273 | * we are actively using the barrier, we know that there will be | |
274 | * at least another opportunity when we idle. | |
275 | */ | |
276 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { | |
277 | if (node == barrier_from_ll(pos)) { | |
278 | node = NULL; | |
279 | continue; | |
280 | } | |
281 | ||
282 | pos->next = head; | |
283 | head = pos; | |
284 | if (!tail) | |
285 | tail = pos; | |
286 | } | |
287 | if (head) | |
288 | llist_add_batch(head, tail, &engine->barrier_tasks); | |
289 | ||
290 | return !node; | |
291 | } | |
292 | ||
f130b712 CW |
293 | static bool |
294 | __active_del_barrier(struct i915_active *ref, struct active_node *node) | |
295 | { | |
296 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); | |
297 | } | |
298 | ||
64d6c500 | 299 | int i915_active_ref(struct i915_active *ref, |
25ffd4b1 | 300 | struct intel_timeline *tl, |
64d6c500 CW |
301 | struct i915_request *rq) |
302 | { | |
21950ee7 | 303 | struct i915_active_request *active; |
12c255b5 | 304 | int err; |
312c4ba1 | 305 | |
25ffd4b1 CW |
306 | lockdep_assert_held(&tl->mutex); |
307 | ||
312c4ba1 | 308 | /* Prevent reaping in case we malloc/wait while building the tree */ |
12c255b5 CW |
309 | err = i915_active_acquire(ref); |
310 | if (err) | |
311 | return err; | |
64d6c500 | 312 | |
25ffd4b1 | 313 | active = active_instance(ref, tl); |
12c255b5 CW |
314 | if (!active) { |
315 | err = -ENOMEM; | |
312c4ba1 CW |
316 | goto out; |
317 | } | |
64d6c500 | 318 | |
d8af05ff CW |
319 | if (is_barrier(active)) { /* proto-node used by our idle barrier */ |
320 | /* | |
321 | * This request is on the kernel_context timeline, and so | |
322 | * we can use it to substitute for the pending idle-barrer | |
323 | * request that we want to emit on the kernel_context. | |
324 | */ | |
325 | __active_del_barrier(ref, node_from_active(active)); | |
326 | RCU_INIT_POINTER(active->request, NULL); | |
327 | INIT_LIST_HEAD(&active->link); | |
328 | } else { | |
329 | if (!i915_active_request_isset(active)) | |
330 | atomic_inc(&ref->count); | |
331 | } | |
332 | GEM_BUG_ON(!atomic_read(&ref->count)); | |
21950ee7 | 333 | __i915_active_request_set(active, rq); |
64d6c500 | 334 | |
312c4ba1 CW |
335 | out: |
336 | i915_active_release(ref); | |
337 | return err; | |
64d6c500 CW |
338 | } |
339 | ||
12c255b5 | 340 | int i915_active_acquire(struct i915_active *ref) |
64d6c500 | 341 | { |
12c255b5 CW |
342 | int err; |
343 | ||
5361db1a | 344 | debug_active_assert(ref); |
12c255b5 CW |
345 | if (atomic_add_unless(&ref->count, 1, 0)) |
346 | return 0; | |
5361db1a | 347 | |
12c255b5 CW |
348 | err = mutex_lock_interruptible(&ref->mutex); |
349 | if (err) | |
350 | return err; | |
5361db1a | 351 | |
12c255b5 CW |
352 | if (!atomic_read(&ref->count) && ref->active) |
353 | err = ref->active(ref); | |
354 | if (!err) { | |
355 | debug_active_activate(ref); | |
356 | atomic_inc(&ref->count); | |
357 | } | |
358 | ||
359 | mutex_unlock(&ref->mutex); | |
360 | ||
361 | return err; | |
64d6c500 CW |
362 | } |
363 | ||
364 | void i915_active_release(struct i915_active *ref) | |
365 | { | |
5361db1a | 366 | debug_active_assert(ref); |
12c255b5 | 367 | active_retire(ref); |
64d6c500 CW |
368 | } |
369 | ||
79c7a28e CW |
370 | static void __active_ungrab(struct i915_active *ref) |
371 | { | |
372 | clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); | |
373 | } | |
374 | ||
375 | bool i915_active_trygrab(struct i915_active *ref) | |
376 | { | |
377 | debug_active_assert(ref); | |
378 | ||
379 | if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) | |
380 | return false; | |
381 | ||
382 | if (!atomic_add_unless(&ref->count, 1, 0)) { | |
383 | __active_ungrab(ref); | |
384 | return false; | |
385 | } | |
386 | ||
387 | return true; | |
388 | } | |
389 | ||
390 | void i915_active_ungrab(struct i915_active *ref) | |
391 | { | |
392 | GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); | |
393 | ||
394 | active_retire(ref); | |
395 | __active_ungrab(ref); | |
396 | } | |
397 | ||
64d6c500 CW |
398 | int i915_active_wait(struct i915_active *ref) |
399 | { | |
400 | struct active_node *it, *n; | |
12c255b5 | 401 | int err; |
64d6c500 | 402 | |
12c255b5 | 403 | might_sleep(); |
d650d1f5 CW |
404 | might_lock(&ref->mutex); |
405 | ||
79c7a28e | 406 | if (i915_active_is_idle(ref)) |
12c255b5 | 407 | return 0; |
64d6c500 | 408 | |
12c255b5 CW |
409 | err = mutex_lock_interruptible(&ref->mutex); |
410 | if (err) | |
411 | return err; | |
412 | ||
413 | if (!atomic_add_unless(&ref->count, 1, 0)) { | |
414 | mutex_unlock(&ref->mutex); | |
415 | return 0; | |
416 | } | |
64d6c500 CW |
417 | |
418 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { | |
d8af05ff CW |
419 | if (is_barrier(&it->base)) { /* unconnected idle-barrier */ |
420 | err = -EBUSY; | |
421 | break; | |
422 | } | |
423 | ||
12c255b5 CW |
424 | err = i915_active_request_retire(&it->base, BKL(ref)); |
425 | if (err) | |
64d6c500 CW |
426 | break; |
427 | } | |
428 | ||
12c255b5 | 429 | __active_retire(ref); |
afd1bcd4 CW |
430 | if (err) |
431 | return err; | |
432 | ||
79c7a28e CW |
433 | if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) |
434 | return -EINTR; | |
435 | ||
afd1bcd4 CW |
436 | if (!i915_active_is_idle(ref)) |
437 | return -EBUSY; | |
438 | ||
439 | return 0; | |
64d6c500 CW |
440 | } |
441 | ||
21950ee7 CW |
442 | int i915_request_await_active_request(struct i915_request *rq, |
443 | struct i915_active_request *active) | |
64d6c500 CW |
444 | { |
445 | struct i915_request *barrier = | |
21950ee7 | 446 | i915_active_request_raw(active, &rq->i915->drm.struct_mutex); |
64d6c500 CW |
447 | |
448 | return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; | |
449 | } | |
450 | ||
451 | int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) | |
452 | { | |
453 | struct active_node *it, *n; | |
12c255b5 | 454 | int err; |
64d6c500 | 455 | |
12c255b5 CW |
456 | if (RB_EMPTY_ROOT(&ref->tree)) |
457 | return 0; | |
312c4ba1 | 458 | |
12c255b5 CW |
459 | /* await allocates and so we need to avoid hitting the shrinker */ |
460 | err = i915_active_acquire(ref); | |
312c4ba1 | 461 | if (err) |
12c255b5 | 462 | return err; |
64d6c500 | 463 | |
12c255b5 | 464 | mutex_lock(&ref->mutex); |
64d6c500 | 465 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
312c4ba1 CW |
466 | err = i915_request_await_active_request(rq, &it->base); |
467 | if (err) | |
12c255b5 | 468 | break; |
64d6c500 | 469 | } |
12c255b5 | 470 | mutex_unlock(&ref->mutex); |
64d6c500 | 471 | |
312c4ba1 CW |
472 | i915_active_release(ref); |
473 | return err; | |
64d6c500 CW |
474 | } |
475 | ||
a42375af | 476 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
64d6c500 CW |
477 | void i915_active_fini(struct i915_active *ref) |
478 | { | |
5361db1a | 479 | debug_active_fini(ref); |
a42375af | 480 | GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); |
12c255b5 CW |
481 | GEM_BUG_ON(atomic_read(&ref->count)); |
482 | mutex_destroy(&ref->mutex); | |
64d6c500 | 483 | } |
a42375af | 484 | #endif |
64d6c500 | 485 | |
d8af05ff CW |
486 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) |
487 | { | |
488 | return node->timeline == idx && !i915_active_request_isset(&node->base); | |
489 | } | |
490 | ||
491 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) | |
492 | { | |
493 | struct rb_node *prev, *p; | |
494 | ||
495 | if (RB_EMPTY_ROOT(&ref->tree)) | |
496 | return NULL; | |
497 | ||
498 | mutex_lock(&ref->mutex); | |
499 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
500 | ||
501 | /* | |
502 | * Try to reuse any existing barrier nodes already allocated for this | |
503 | * i915_active, due to overlapping active phases there is likely a | |
504 | * node kept alive (as we reuse before parking). We prefer to reuse | |
505 | * completely idle barriers (less hassle in manipulating the llists), | |
506 | * but otherwise any will do. | |
507 | */ | |
508 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { | |
509 | p = &ref->cache->node; | |
510 | goto match; | |
511 | } | |
512 | ||
513 | prev = NULL; | |
514 | p = ref->tree.rb_node; | |
515 | while (p) { | |
516 | struct active_node *node = | |
517 | rb_entry(p, struct active_node, node); | |
518 | ||
519 | if (is_idle_barrier(node, idx)) | |
520 | goto match; | |
521 | ||
522 | prev = p; | |
523 | if (node->timeline < idx) | |
524 | p = p->rb_right; | |
525 | else | |
526 | p = p->rb_left; | |
527 | } | |
528 | ||
529 | /* | |
530 | * No quick match, but we did find the leftmost rb_node for the | |
531 | * kernel_context. Walk the rb_tree in-order to see if there were | |
532 | * any idle-barriers on this timeline that we missed, or just use | |
533 | * the first pending barrier. | |
534 | */ | |
535 | for (p = prev; p; p = rb_next(p)) { | |
536 | struct active_node *node = | |
537 | rb_entry(p, struct active_node, node); | |
f130b712 | 538 | struct intel_engine_cs *engine; |
d8af05ff CW |
539 | |
540 | if (node->timeline > idx) | |
541 | break; | |
542 | ||
543 | if (node->timeline < idx) | |
544 | continue; | |
545 | ||
546 | if (is_idle_barrier(node, idx)) | |
547 | goto match; | |
548 | ||
549 | /* | |
550 | * The list of pending barriers is protected by the | |
551 | * kernel_context timeline, which notably we do not hold | |
552 | * here. i915_request_add_active_barriers() may consume | |
553 | * the barrier before we claim it, so we have to check | |
554 | * for success. | |
555 | */ | |
f130b712 CW |
556 | engine = __barrier_to_engine(node); |
557 | smp_rmb(); /* serialise with add_active_barriers */ | |
558 | if (is_barrier(&node->base) && | |
559 | ____active_del_barrier(ref, node, engine)) | |
d8af05ff CW |
560 | goto match; |
561 | } | |
562 | ||
563 | mutex_unlock(&ref->mutex); | |
564 | ||
565 | return NULL; | |
566 | ||
567 | match: | |
568 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ | |
569 | if (p == &ref->cache->node) | |
570 | ref->cache = NULL; | |
571 | mutex_unlock(&ref->mutex); | |
572 | ||
573 | return rb_entry(p, struct active_node, node); | |
574 | } | |
575 | ||
ce476c80 CW |
576 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
577 | struct intel_engine_cs *engine) | |
578 | { | |
579 | struct drm_i915_private *i915 = engine->i915; | |
3f99a614 | 580 | intel_engine_mask_t tmp, mask = engine->mask; |
7009db14 | 581 | struct llist_node *pos, *next; |
7009db14 | 582 | int err; |
ce476c80 | 583 | |
d8af05ff CW |
584 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); |
585 | ||
586 | /* | |
587 | * Preallocate a node for each physical engine supporting the target | |
588 | * engine (remember virtual engines have more than one sibling). | |
589 | * We can then use the preallocated nodes in | |
590 | * i915_active_acquire_barrier() | |
591 | */ | |
3f99a614 | 592 | for_each_engine_masked(engine, i915, mask, tmp) { |
75d0a7f3 | 593 | u64 idx = engine->kernel_context->timeline->fence_context; |
ce476c80 CW |
594 | struct active_node *node; |
595 | ||
d8af05ff CW |
596 | node = reuse_idle_barrier(ref, idx); |
597 | if (!node) { | |
598 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
599 | if (!node) { | |
600 | err = ENOMEM; | |
601 | goto unwind; | |
602 | } | |
603 | ||
25ffd4b1 CW |
604 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
605 | node->base.lock = | |
606 | &engine->kernel_context->timeline->mutex; | |
607 | #endif | |
d8af05ff CW |
608 | RCU_INIT_POINTER(node->base.request, NULL); |
609 | node->base.retire = node_retire; | |
610 | node->timeline = idx; | |
611 | node->ref = ref; | |
ce476c80 CW |
612 | } |
613 | ||
d8af05ff CW |
614 | if (!i915_active_request_isset(&node->base)) { |
615 | /* | |
616 | * Mark this as being *our* unconnected proto-node. | |
617 | * | |
618 | * Since this node is not in any list, and we have | |
619 | * decoupled it from the rbtree, we can reuse the | |
620 | * request to indicate this is an idle-barrier node | |
621 | * and then we can use the rb_node and list pointers | |
622 | * for our tracking of the pending barrier. | |
623 | */ | |
624 | RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); | |
625 | node->base.link.prev = (void *)engine; | |
626 | atomic_inc(&ref->count); | |
627 | } | |
ce476c80 | 628 | |
d8af05ff CW |
629 | GEM_BUG_ON(barrier_to_engine(node) != engine); |
630 | llist_add(barrier_to_ll(node), &ref->preallocated_barriers); | |
7009db14 | 631 | intel_engine_pm_get(engine); |
ce476c80 CW |
632 | } |
633 | ||
7009db14 CW |
634 | return 0; |
635 | ||
636 | unwind: | |
d8af05ff CW |
637 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
638 | struct active_node *node = barrier_from_ll(pos); | |
7009db14 | 639 | |
d8af05ff CW |
640 | atomic_dec(&ref->count); |
641 | intel_engine_pm_put(barrier_to_engine(node)); | |
7009db14 | 642 | |
7009db14 CW |
643 | kmem_cache_free(global.slab_cache, node); |
644 | } | |
ce476c80 CW |
645 | return err; |
646 | } | |
647 | ||
648 | void i915_active_acquire_barrier(struct i915_active *ref) | |
649 | { | |
650 | struct llist_node *pos, *next; | |
651 | ||
12c255b5 | 652 | GEM_BUG_ON(i915_active_is_idle(ref)); |
ce476c80 | 653 | |
d8af05ff CW |
654 | /* |
655 | * Transfer the list of preallocated barriers into the | |
656 | * i915_active rbtree, but only as proto-nodes. They will be | |
657 | * populated by i915_request_add_active_barriers() to point to the | |
658 | * request that will eventually release them. | |
659 | */ | |
12c255b5 | 660 | mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); |
d8af05ff CW |
661 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
662 | struct active_node *node = barrier_from_ll(pos); | |
663 | struct intel_engine_cs *engine = barrier_to_engine(node); | |
ce476c80 CW |
664 | struct rb_node **p, *parent; |
665 | ||
ce476c80 CW |
666 | parent = NULL; |
667 | p = &ref->tree.rb_node; | |
668 | while (*p) { | |
d8af05ff CW |
669 | struct active_node *it; |
670 | ||
ce476c80 | 671 | parent = *p; |
d8af05ff CW |
672 | |
673 | it = rb_entry(parent, struct active_node, node); | |
674 | if (it->timeline < node->timeline) | |
ce476c80 CW |
675 | p = &parent->rb_right; |
676 | else | |
677 | p = &parent->rb_left; | |
678 | } | |
679 | rb_link_node(&node->node, parent, p); | |
680 | rb_insert_color(&node->node, &ref->tree); | |
681 | ||
d8af05ff | 682 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); |
7009db14 | 683 | intel_engine_pm_put(engine); |
ce476c80 | 684 | } |
12c255b5 | 685 | mutex_unlock(&ref->mutex); |
ce476c80 CW |
686 | } |
687 | ||
d8af05ff | 688 | void i915_request_add_active_barriers(struct i915_request *rq) |
ce476c80 CW |
689 | { |
690 | struct intel_engine_cs *engine = rq->engine; | |
691 | struct llist_node *node, *next; | |
692 | ||
d8af05ff | 693 | GEM_BUG_ON(intel_engine_is_virtual(engine)); |
75d0a7f3 | 694 | GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline); |
d8af05ff CW |
695 | |
696 | /* | |
697 | * Attach the list of proto-fences to the in-flight request such | |
698 | * that the parent i915_active will be released when this request | |
699 | * is retired. | |
700 | */ | |
701 | llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { | |
702 | RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); | |
f130b712 | 703 | smp_wmb(); /* serialise with reuse_idle_barrier */ |
ce476c80 | 704 | list_add_tail((struct list_head *)node, &rq->active_list); |
d8af05ff | 705 | } |
ce476c80 CW |
706 | } |
707 | ||
21950ee7 CW |
708 | int i915_active_request_set(struct i915_active_request *active, |
709 | struct i915_request *rq) | |
710 | { | |
711 | int err; | |
712 | ||
25ffd4b1 CW |
713 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
714 | lockdep_assert_held(active->lock); | |
715 | #endif | |
716 | ||
21950ee7 CW |
717 | /* Must maintain ordering wrt previous active requests */ |
718 | err = i915_request_await_active_request(rq, active); | |
719 | if (err) | |
720 | return err; | |
721 | ||
722 | __i915_active_request_set(active, rq); | |
723 | return 0; | |
724 | } | |
725 | ||
726 | void i915_active_retire_noop(struct i915_active_request *active, | |
727 | struct i915_request *request) | |
728 | { | |
729 | /* Space left intentionally blank */ | |
730 | } | |
731 | ||
64d6c500 CW |
732 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
733 | #include "selftests/i915_active.c" | |
734 | #endif | |
5f5c139d | 735 | |
103b76ee | 736 | static void i915_global_active_shrink(void) |
5f5c139d | 737 | { |
103b76ee | 738 | kmem_cache_shrink(global.slab_cache); |
5f5c139d CW |
739 | } |
740 | ||
103b76ee | 741 | static void i915_global_active_exit(void) |
32eb6bcf | 742 | { |
103b76ee | 743 | kmem_cache_destroy(global.slab_cache); |
32eb6bcf CW |
744 | } |
745 | ||
103b76ee CW |
746 | static struct i915_global_active global = { { |
747 | .shrink = i915_global_active_shrink, | |
748 | .exit = i915_global_active_exit, | |
749 | } }; | |
750 | ||
751 | int __init i915_global_active_init(void) | |
5f5c139d | 752 | { |
103b76ee CW |
753 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
754 | if (!global.slab_cache) | |
755 | return -ENOMEM; | |
756 | ||
757 | i915_global_register(&global.base); | |
758 | return 0; | |
5f5c139d | 759 | } |