Commit | Line | Data |
---|---|---|
64d6c500 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2019 Intel Corporation | |
5 | */ | |
6 | ||
5361db1a CW |
7 | #include <linux/debugobjects.h> |
8 | ||
7009db14 CW |
9 | #include "gt/intel_engine_pm.h" |
10 | ||
64d6c500 CW |
11 | #include "i915_drv.h" |
12 | #include "i915_active.h" | |
103b76ee | 13 | #include "i915_globals.h" |
64d6c500 CW |
14 | |
15 | #define BKL(ref) (&(ref)->i915->drm.struct_mutex) | |
16 | ||
5f5c139d CW |
17 | /* |
18 | * Active refs memory management | |
19 | * | |
20 | * To be more economical with memory, we reap all the i915_active trees as | |
21 | * they idle (when we know the active requests are inactive) and allocate the | |
22 | * nodes from a local slab cache to hopefully reduce the fragmentation. | |
23 | */ | |
24 | static struct i915_global_active { | |
103b76ee | 25 | struct i915_global base; |
5f5c139d CW |
26 | struct kmem_cache *slab_cache; |
27 | } global; | |
28 | ||
64d6c500 | 29 | struct active_node { |
21950ee7 | 30 | struct i915_active_request base; |
64d6c500 CW |
31 | struct i915_active *ref; |
32 | struct rb_node node; | |
33 | u64 timeline; | |
34 | }; | |
35 | ||
5361db1a CW |
36 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
37 | ||
38 | static void *active_debug_hint(void *addr) | |
39 | { | |
40 | struct i915_active *ref = addr; | |
41 | ||
12c255b5 | 42 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
5361db1a CW |
43 | } |
44 | ||
45 | static struct debug_obj_descr active_debug_desc = { | |
46 | .name = "i915_active", | |
47 | .debug_hint = active_debug_hint, | |
48 | }; | |
49 | ||
50 | static void debug_active_init(struct i915_active *ref) | |
51 | { | |
52 | debug_object_init(ref, &active_debug_desc); | |
53 | } | |
54 | ||
55 | static void debug_active_activate(struct i915_active *ref) | |
56 | { | |
57 | debug_object_activate(ref, &active_debug_desc); | |
58 | } | |
59 | ||
60 | static void debug_active_deactivate(struct i915_active *ref) | |
61 | { | |
62 | debug_object_deactivate(ref, &active_debug_desc); | |
63 | } | |
64 | ||
65 | static void debug_active_fini(struct i915_active *ref) | |
66 | { | |
67 | debug_object_free(ref, &active_debug_desc); | |
68 | } | |
69 | ||
70 | static void debug_active_assert(struct i915_active *ref) | |
71 | { | |
72 | debug_object_assert_init(ref, &active_debug_desc); | |
73 | } | |
74 | ||
75 | #else | |
76 | ||
77 | static inline void debug_active_init(struct i915_active *ref) { } | |
78 | static inline void debug_active_activate(struct i915_active *ref) { } | |
79 | static inline void debug_active_deactivate(struct i915_active *ref) { } | |
80 | static inline void debug_active_fini(struct i915_active *ref) { } | |
81 | static inline void debug_active_assert(struct i915_active *ref) { } | |
82 | ||
83 | #endif | |
84 | ||
a42375af | 85 | static void |
12c255b5 | 86 | __active_retire(struct i915_active *ref) |
a42375af CW |
87 | { |
88 | struct active_node *it, *n; | |
12c255b5 CW |
89 | struct rb_root root; |
90 | bool retire = false; | |
91 | ||
92 | lockdep_assert_held(&ref->mutex); | |
93 | ||
94 | /* return the unused nodes to our slabcache -- flushing the allocator */ | |
95 | if (atomic_dec_and_test(&ref->count)) { | |
96 | debug_active_deactivate(ref); | |
97 | root = ref->tree; | |
98 | ref->tree = RB_ROOT; | |
99 | ref->cache = NULL; | |
100 | retire = true; | |
101 | } | |
a42375af | 102 | |
12c255b5 CW |
103 | mutex_unlock(&ref->mutex); |
104 | if (!retire) | |
105 | return; | |
106 | ||
107 | ref->retire(ref); | |
108 | ||
109 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { | |
21950ee7 | 110 | GEM_BUG_ON(i915_active_request_isset(&it->base)); |
5f5c139d | 111 | kmem_cache_free(global.slab_cache, it); |
a42375af | 112 | } |
a42375af CW |
113 | } |
114 | ||
64d6c500 | 115 | static void |
12c255b5 | 116 | active_retire(struct i915_active *ref) |
64d6c500 | 117 | { |
12c255b5 CW |
118 | GEM_BUG_ON(!atomic_read(&ref->count)); |
119 | if (atomic_add_unless(&ref->count, -1, 1)) | |
a42375af CW |
120 | return; |
121 | ||
12c255b5 CW |
122 | /* One active may be flushed from inside the acquire of another */ |
123 | mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); | |
124 | __active_retire(ref); | |
64d6c500 CW |
125 | } |
126 | ||
127 | static void | |
21950ee7 | 128 | node_retire(struct i915_active_request *base, struct i915_request *rq) |
64d6c500 | 129 | { |
12c255b5 | 130 | active_retire(container_of(base, struct active_node, base)->ref); |
64d6c500 CW |
131 | } |
132 | ||
21950ee7 | 133 | static struct i915_active_request * |
64d6c500 CW |
134 | active_instance(struct i915_active *ref, u64 idx) |
135 | { | |
12c255b5 | 136 | struct active_node *node, *prealloc; |
64d6c500 | 137 | struct rb_node **p, *parent; |
64d6c500 CW |
138 | |
139 | /* | |
140 | * We track the most recently used timeline to skip a rbtree search | |
141 | * for the common case, under typical loads we never need the rbtree | |
142 | * at all. We can reuse the last slot if it is empty, that is | |
143 | * after the previous activity has been retired, or if it matches the | |
144 | * current timeline. | |
64d6c500 | 145 | */ |
12c255b5 CW |
146 | node = READ_ONCE(ref->cache); |
147 | if (node && node->timeline == idx) | |
148 | return &node->base; | |
149 | ||
150 | /* Preallocate a replacement, just in case */ | |
151 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
152 | if (!prealloc) | |
153 | return NULL; | |
64d6c500 | 154 | |
12c255b5 CW |
155 | mutex_lock(&ref->mutex); |
156 | GEM_BUG_ON(i915_active_is_idle(ref)); | |
64d6c500 CW |
157 | |
158 | parent = NULL; | |
159 | p = &ref->tree.rb_node; | |
160 | while (*p) { | |
161 | parent = *p; | |
162 | ||
163 | node = rb_entry(parent, struct active_node, node); | |
12c255b5 CW |
164 | if (node->timeline == idx) { |
165 | kmem_cache_free(global.slab_cache, prealloc); | |
166 | goto out; | |
167 | } | |
64d6c500 CW |
168 | |
169 | if (node->timeline < idx) | |
170 | p = &parent->rb_right; | |
171 | else | |
172 | p = &parent->rb_left; | |
173 | } | |
174 | ||
12c255b5 | 175 | node = prealloc; |
21950ee7 | 176 | i915_active_request_init(&node->base, NULL, node_retire); |
64d6c500 CW |
177 | node->ref = ref; |
178 | node->timeline = idx; | |
179 | ||
180 | rb_link_node(&node->node, parent, p); | |
181 | rb_insert_color(&node->node, &ref->tree); | |
182 | ||
64d6c500 | 183 | out: |
12c255b5 CW |
184 | ref->cache = node; |
185 | mutex_unlock(&ref->mutex); | |
186 | ||
187 | return &node->base; | |
64d6c500 CW |
188 | } |
189 | ||
12c255b5 CW |
190 | void __i915_active_init(struct drm_i915_private *i915, |
191 | struct i915_active *ref, | |
192 | int (*active)(struct i915_active *ref), | |
193 | void (*retire)(struct i915_active *ref), | |
194 | struct lock_class_key *key) | |
64d6c500 | 195 | { |
5361db1a CW |
196 | debug_active_init(ref); |
197 | ||
64d6c500 | 198 | ref->i915 = i915; |
12c255b5 | 199 | ref->active = active; |
64d6c500 CW |
200 | ref->retire = retire; |
201 | ref->tree = RB_ROOT; | |
12c255b5 | 202 | ref->cache = NULL; |
ce476c80 | 203 | init_llist_head(&ref->barriers); |
12c255b5 CW |
204 | atomic_set(&ref->count, 0); |
205 | __mutex_init(&ref->mutex, "i915_active", key); | |
64d6c500 CW |
206 | } |
207 | ||
208 | int i915_active_ref(struct i915_active *ref, | |
209 | u64 timeline, | |
210 | struct i915_request *rq) | |
211 | { | |
21950ee7 | 212 | struct i915_active_request *active; |
12c255b5 | 213 | int err; |
312c4ba1 CW |
214 | |
215 | /* Prevent reaping in case we malloc/wait while building the tree */ | |
12c255b5 CW |
216 | err = i915_active_acquire(ref); |
217 | if (err) | |
218 | return err; | |
64d6c500 CW |
219 | |
220 | active = active_instance(ref, timeline); | |
12c255b5 CW |
221 | if (!active) { |
222 | err = -ENOMEM; | |
312c4ba1 CW |
223 | goto out; |
224 | } | |
64d6c500 | 225 | |
21950ee7 | 226 | if (!i915_active_request_isset(active)) |
12c255b5 | 227 | atomic_inc(&ref->count); |
21950ee7 | 228 | __i915_active_request_set(active, rq); |
64d6c500 | 229 | |
312c4ba1 CW |
230 | out: |
231 | i915_active_release(ref); | |
232 | return err; | |
64d6c500 CW |
233 | } |
234 | ||
12c255b5 | 235 | int i915_active_acquire(struct i915_active *ref) |
64d6c500 | 236 | { |
12c255b5 CW |
237 | int err; |
238 | ||
5361db1a | 239 | debug_active_assert(ref); |
12c255b5 CW |
240 | if (atomic_add_unless(&ref->count, 1, 0)) |
241 | return 0; | |
5361db1a | 242 | |
12c255b5 CW |
243 | err = mutex_lock_interruptible(&ref->mutex); |
244 | if (err) | |
245 | return err; | |
5361db1a | 246 | |
12c255b5 CW |
247 | if (!atomic_read(&ref->count) && ref->active) |
248 | err = ref->active(ref); | |
249 | if (!err) { | |
250 | debug_active_activate(ref); | |
251 | atomic_inc(&ref->count); | |
252 | } | |
253 | ||
254 | mutex_unlock(&ref->mutex); | |
255 | ||
256 | return err; | |
64d6c500 CW |
257 | } |
258 | ||
259 | void i915_active_release(struct i915_active *ref) | |
260 | { | |
5361db1a | 261 | debug_active_assert(ref); |
12c255b5 | 262 | active_retire(ref); |
64d6c500 CW |
263 | } |
264 | ||
265 | int i915_active_wait(struct i915_active *ref) | |
266 | { | |
267 | struct active_node *it, *n; | |
12c255b5 | 268 | int err; |
64d6c500 | 269 | |
12c255b5 CW |
270 | might_sleep(); |
271 | if (RB_EMPTY_ROOT(&ref->tree)) | |
272 | return 0; | |
64d6c500 | 273 | |
12c255b5 CW |
274 | err = mutex_lock_interruptible(&ref->mutex); |
275 | if (err) | |
276 | return err; | |
277 | ||
278 | if (!atomic_add_unless(&ref->count, 1, 0)) { | |
279 | mutex_unlock(&ref->mutex); | |
280 | return 0; | |
281 | } | |
64d6c500 CW |
282 | |
283 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { | |
12c255b5 CW |
284 | err = i915_active_request_retire(&it->base, BKL(ref)); |
285 | if (err) | |
64d6c500 CW |
286 | break; |
287 | } | |
288 | ||
12c255b5 | 289 | __active_retire(ref); |
afd1bcd4 CW |
290 | if (err) |
291 | return err; | |
292 | ||
293 | if (!i915_active_is_idle(ref)) | |
294 | return -EBUSY; | |
295 | ||
296 | return 0; | |
64d6c500 CW |
297 | } |
298 | ||
21950ee7 CW |
299 | int i915_request_await_active_request(struct i915_request *rq, |
300 | struct i915_active_request *active) | |
64d6c500 CW |
301 | { |
302 | struct i915_request *barrier = | |
21950ee7 | 303 | i915_active_request_raw(active, &rq->i915->drm.struct_mutex); |
64d6c500 CW |
304 | |
305 | return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; | |
306 | } | |
307 | ||
308 | int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) | |
309 | { | |
310 | struct active_node *it, *n; | |
12c255b5 | 311 | int err; |
64d6c500 | 312 | |
12c255b5 CW |
313 | if (RB_EMPTY_ROOT(&ref->tree)) |
314 | return 0; | |
312c4ba1 | 315 | |
12c255b5 CW |
316 | /* await allocates and so we need to avoid hitting the shrinker */ |
317 | err = i915_active_acquire(ref); | |
312c4ba1 | 318 | if (err) |
12c255b5 | 319 | return err; |
64d6c500 | 320 | |
12c255b5 | 321 | mutex_lock(&ref->mutex); |
64d6c500 | 322 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
312c4ba1 CW |
323 | err = i915_request_await_active_request(rq, &it->base); |
324 | if (err) | |
12c255b5 | 325 | break; |
64d6c500 | 326 | } |
12c255b5 | 327 | mutex_unlock(&ref->mutex); |
64d6c500 | 328 | |
312c4ba1 CW |
329 | i915_active_release(ref); |
330 | return err; | |
64d6c500 CW |
331 | } |
332 | ||
a42375af | 333 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
64d6c500 CW |
334 | void i915_active_fini(struct i915_active *ref) |
335 | { | |
5361db1a | 336 | debug_active_fini(ref); |
a42375af | 337 | GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); |
12c255b5 CW |
338 | GEM_BUG_ON(atomic_read(&ref->count)); |
339 | mutex_destroy(&ref->mutex); | |
64d6c500 | 340 | } |
a42375af | 341 | #endif |
64d6c500 | 342 | |
ce476c80 CW |
343 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
344 | struct intel_engine_cs *engine) | |
345 | { | |
346 | struct drm_i915_private *i915 = engine->i915; | |
7009db14 | 347 | struct llist_node *pos, *next; |
ce476c80 | 348 | unsigned long tmp; |
7009db14 | 349 | int err; |
ce476c80 CW |
350 | |
351 | GEM_BUG_ON(!engine->mask); | |
352 | for_each_engine_masked(engine, i915, engine->mask, tmp) { | |
353 | struct intel_context *kctx = engine->kernel_context; | |
354 | struct active_node *node; | |
355 | ||
356 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | |
357 | if (unlikely(!node)) { | |
358 | err = -ENOMEM; | |
7009db14 | 359 | goto unwind; |
ce476c80 CW |
360 | } |
361 | ||
362 | i915_active_request_init(&node->base, | |
363 | (void *)engine, node_retire); | |
364 | node->timeline = kctx->ring->timeline->fence_context; | |
365 | node->ref = ref; | |
12c255b5 | 366 | atomic_inc(&ref->count); |
ce476c80 | 367 | |
7009db14 | 368 | intel_engine_pm_get(engine); |
ce476c80 CW |
369 | llist_add((struct llist_node *)&node->base.link, |
370 | &ref->barriers); | |
371 | } | |
372 | ||
7009db14 CW |
373 | return 0; |
374 | ||
375 | unwind: | |
376 | llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { | |
377 | struct active_node *node; | |
378 | ||
379 | node = container_of((struct list_head *)pos, | |
380 | typeof(*node), base.link); | |
381 | engine = (void *)rcu_access_pointer(node->base.request); | |
382 | ||
383 | intel_engine_pm_put(engine); | |
384 | kmem_cache_free(global.slab_cache, node); | |
385 | } | |
ce476c80 CW |
386 | return err; |
387 | } | |
388 | ||
389 | void i915_active_acquire_barrier(struct i915_active *ref) | |
390 | { | |
391 | struct llist_node *pos, *next; | |
392 | ||
12c255b5 | 393 | GEM_BUG_ON(i915_active_is_idle(ref)); |
ce476c80 | 394 | |
12c255b5 | 395 | mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); |
ce476c80 CW |
396 | llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { |
397 | struct intel_engine_cs *engine; | |
398 | struct active_node *node; | |
399 | struct rb_node **p, *parent; | |
400 | ||
401 | node = container_of((struct list_head *)pos, | |
402 | typeof(*node), base.link); | |
403 | ||
404 | engine = (void *)rcu_access_pointer(node->base.request); | |
405 | RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); | |
406 | ||
407 | parent = NULL; | |
408 | p = &ref->tree.rb_node; | |
409 | while (*p) { | |
410 | parent = *p; | |
411 | if (rb_entry(parent, | |
412 | struct active_node, | |
413 | node)->timeline < node->timeline) | |
414 | p = &parent->rb_right; | |
415 | else | |
416 | p = &parent->rb_left; | |
417 | } | |
418 | rb_link_node(&node->node, parent, p); | |
419 | rb_insert_color(&node->node, &ref->tree); | |
420 | ||
421 | llist_add((struct llist_node *)&node->base.link, | |
422 | &engine->barrier_tasks); | |
7009db14 | 423 | intel_engine_pm_put(engine); |
ce476c80 | 424 | } |
12c255b5 | 425 | mutex_unlock(&ref->mutex); |
ce476c80 CW |
426 | } |
427 | ||
428 | void i915_request_add_barriers(struct i915_request *rq) | |
429 | { | |
430 | struct intel_engine_cs *engine = rq->engine; | |
431 | struct llist_node *node, *next; | |
432 | ||
433 | llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) | |
434 | list_add_tail((struct list_head *)node, &rq->active_list); | |
435 | } | |
436 | ||
21950ee7 CW |
437 | int i915_active_request_set(struct i915_active_request *active, |
438 | struct i915_request *rq) | |
439 | { | |
440 | int err; | |
441 | ||
442 | /* Must maintain ordering wrt previous active requests */ | |
443 | err = i915_request_await_active_request(rq, active); | |
444 | if (err) | |
445 | return err; | |
446 | ||
447 | __i915_active_request_set(active, rq); | |
448 | return 0; | |
449 | } | |
450 | ||
451 | void i915_active_retire_noop(struct i915_active_request *active, | |
452 | struct i915_request *request) | |
453 | { | |
454 | /* Space left intentionally blank */ | |
455 | } | |
456 | ||
64d6c500 CW |
457 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
458 | #include "selftests/i915_active.c" | |
459 | #endif | |
5f5c139d | 460 | |
103b76ee | 461 | static void i915_global_active_shrink(void) |
5f5c139d | 462 | { |
103b76ee | 463 | kmem_cache_shrink(global.slab_cache); |
5f5c139d CW |
464 | } |
465 | ||
103b76ee | 466 | static void i915_global_active_exit(void) |
32eb6bcf | 467 | { |
103b76ee | 468 | kmem_cache_destroy(global.slab_cache); |
32eb6bcf CW |
469 | } |
470 | ||
103b76ee CW |
471 | static struct i915_global_active global = { { |
472 | .shrink = i915_global_active_shrink, | |
473 | .exit = i915_global_active_exit, | |
474 | } }; | |
475 | ||
476 | int __init i915_global_active_init(void) | |
5f5c139d | 477 | { |
103b76ee CW |
478 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
479 | if (!global.slab_cache) | |
480 | return -ENOMEM; | |
481 | ||
482 | i915_global_register(&global.base); | |
483 | return 0; | |
5f5c139d | 484 | } |