drm/i915: stop using seqcount for fence pruning
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_scheduler.c
CommitLineData
e2f3496e
CW
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2018 Intel Corporation
5 */
6
7#include <linux/mutex.h>
8
9#include "i915_drv.h"
103b76ee 10#include "i915_globals.h"
e2f3496e
CW
11#include "i915_request.h"
12#include "i915_scheduler.h"
13
32eb6bcf 14static struct i915_global_scheduler {
103b76ee 15 struct i915_global base;
32eb6bcf
CW
16 struct kmem_cache *slab_dependencies;
17 struct kmem_cache *slab_priorities;
18} global;
19
e2f3496e
CW
20static DEFINE_SPINLOCK(schedule_lock);
21
22static const struct i915_request *
23node_to_request(const struct i915_sched_node *node)
24{
25 return container_of(node, const struct i915_request, sched);
26}
27
babfb1b5
CW
28static inline bool node_started(const struct i915_sched_node *node)
29{
30 return i915_request_started(node_to_request(node));
31}
32
e2f3496e
CW
33static inline bool node_signaled(const struct i915_sched_node *node)
34{
35 return i915_request_completed(node_to_request(node));
36}
37
e2f3496e
CW
38static inline struct i915_priolist *to_priolist(struct rb_node *rb)
39{
40 return rb_entry(rb, struct i915_priolist, node);
41}
42
4d97cbe0 43static void assert_priolists(struct intel_engine_execlists * const execlists)
e2f3496e
CW
44{
45 struct rb_node *rb;
46 long last_prio, i;
47
48 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
49 return;
50
51 GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
52 rb_first(&execlists->queue.rb_root));
53
4d97cbe0 54 last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
e2f3496e
CW
55 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
56 const struct i915_priolist *p = to_priolist(rb);
57
58 GEM_BUG_ON(p->priority >= last_prio);
59 last_prio = p->priority;
60
61 GEM_BUG_ON(!p->used);
62 for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
63 if (list_empty(&p->requests[i]))
64 continue;
65
66 GEM_BUG_ON(!(p->used & BIT(i)));
67 }
68 }
69}
70
71struct list_head *
72i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
73{
74 struct intel_engine_execlists * const execlists = &engine->execlists;
75 struct i915_priolist *p;
76 struct rb_node **parent, *rb;
77 bool first = true;
78 int idx, i;
79
422d7df4 80 lockdep_assert_held(&engine->active.lock);
4d97cbe0 81 assert_priolists(execlists);
e2f3496e
CW
82
83 /* buckets sorted from highest [in slot 0] to lowest priority */
84 idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
85 prio >>= I915_USER_PRIORITY_SHIFT;
86 if (unlikely(execlists->no_priolist))
87 prio = I915_PRIORITY_NORMAL;
88
89find_priolist:
90 /* most positive priority is scheduled first, equal priorities fifo */
91 rb = NULL;
92 parent = &execlists->queue.rb_root.rb_node;
93 while (*parent) {
94 rb = *parent;
95 p = to_priolist(rb);
96 if (prio > p->priority) {
97 parent = &rb->rb_left;
98 } else if (prio < p->priority) {
99 parent = &rb->rb_right;
100 first = false;
101 } else {
102 goto out;
103 }
104 }
105
106 if (prio == I915_PRIORITY_NORMAL) {
107 p = &execlists->default_priolist;
108 } else {
32eb6bcf 109 p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
e2f3496e
CW
110 /* Convert an allocation failure to a priority bump */
111 if (unlikely(!p)) {
112 prio = I915_PRIORITY_NORMAL; /* recurses just once */
113
114 /* To maintain ordering with all rendering, after an
115 * allocation failure we have to disable all scheduling.
116 * Requests will then be executed in fifo, and schedule
117 * will ensure that dependencies are emitted in fifo.
118 * There will be still some reordering with existing
119 * requests, so if userspace lied about their
120 * dependencies that reordering may be visible.
121 */
122 execlists->no_priolist = true;
123 goto find_priolist;
124 }
125 }
126
127 p->priority = prio;
128 for (i = 0; i < ARRAY_SIZE(p->requests); i++)
129 INIT_LIST_HEAD(&p->requests[i]);
130 rb_link_node(&p->node, rb, parent);
131 rb_insert_color_cached(&p->node, &execlists->queue, first);
132 p->used = 0;
133
134out:
135 p->used |= BIT(idx);
136 return &p->requests[idx];
137}
138
5ae87063
CW
139void __i915_priolist_free(struct i915_priolist *p)
140{
141 kmem_cache_free(global.slab_priorities, p);
142}
143
ed7dc677
CW
144struct sched_cache {
145 struct list_head *priolist;
146};
147
e2f3496e 148static struct intel_engine_cs *
ed7dc677
CW
149sched_lock_engine(const struct i915_sched_node *node,
150 struct intel_engine_cs *locked,
151 struct sched_cache *cache)
e2f3496e 152{
6d06779e
CW
153 const struct i915_request *rq = node_to_request(node);
154 struct intel_engine_cs *engine;
e2f3496e
CW
155
156 GEM_BUG_ON(!locked);
157
6d06779e
CW
158 /*
159 * Virtual engines complicate acquiring the engine timeline lock,
160 * as their rq->engine pointer is not stable until under that
161 * engine lock. The simple ploy we use is to take the lock then
162 * check that the rq still belongs to the newly locked engine.
163 */
164 while (locked != (engine = READ_ONCE(rq->engine))) {
422d7df4 165 spin_unlock(&locked->active.lock);
ed7dc677 166 memset(cache, 0, sizeof(*cache));
422d7df4 167 spin_lock(&engine->active.lock);
6d06779e 168 locked = engine;
e2f3496e
CW
169 }
170
6d06779e
CW
171 GEM_BUG_ON(locked != engine);
172 return locked;
e2f3496e
CW
173}
174
25d851ad 175static inline int rq_prio(const struct i915_request *rq)
c9a64622 176{
25d851ad
CW
177 return rq->sched.attr.priority | __NO_PREEMPTION;
178}
179
180static void kick_submission(struct intel_engine_cs *engine, int prio)
181{
182 const struct i915_request *inflight =
183 port_request(engine->execlists.port);
c9a64622 184
25d851ad
CW
185 /*
186 * If we are already the currently executing context, don't
187 * bother evaluating if we should preempt ourselves, or if
188 * we expect nothing to change as a result of running the
189 * tasklet, i.e. we have not change the priority queue
190 * sufficiently to oust the running context.
191 */
422d7df4 192 if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
25d851ad 193 return;
c9a64622 194
25d851ad 195 tasklet_hi_schedule(&engine->execlists.tasklet);
c9a64622
CW
196}
197
52c76fb1 198static void __i915_schedule(struct i915_sched_node *node,
e9eaf82d 199 const struct i915_sched_attr *attr)
e2f3496e 200{
ed7dc677 201 struct intel_engine_cs *engine;
e2f3496e
CW
202 struct i915_dependency *dep, *p;
203 struct i915_dependency stack;
204 const int prio = attr->priority;
ed7dc677 205 struct sched_cache cache;
e2f3496e
CW
206 LIST_HEAD(dfs);
207
e9eaf82d
CW
208 /* Needed in order to use the temporary link inside i915_dependency */
209 lockdep_assert_held(&schedule_lock);
e2f3496e
CW
210 GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
211
19098018 212 if (prio <= READ_ONCE(node->attr.priority))
e2f3496e
CW
213 return;
214
19098018 215 if (node_signaled(node))
e2f3496e
CW
216 return;
217
52c76fb1 218 stack.signaler = node;
e2f3496e
CW
219 list_add(&stack.dfs_link, &dfs);
220
221 /*
222 * Recursively bump all dependent priorities to match the new request.
223 *
224 * A naive approach would be to use recursion:
225 * static void update_priorities(struct i915_sched_node *node, prio) {
226 * list_for_each_entry(dep, &node->signalers_list, signal_link)
227 * update_priorities(dep->signal, prio)
228 * queue_request(node);
229 * }
230 * but that may have unlimited recursion depth and so runs a very
231 * real risk of overunning the kernel stack. Instead, we build
232 * a flat list of all dependencies starting with the current request.
233 * As we walk the list of dependencies, we add all of its dependencies
234 * to the end of the list (this may include an already visited
235 * request) and continue to walk onwards onto the new dependencies. The
236 * end result is a topological list of requests in reverse order, the
237 * last element in the list is the request we must execute first.
238 */
239 list_for_each_entry(dep, &dfs, dfs_link) {
240 struct i915_sched_node *node = dep->signaler;
241
babfb1b5
CW
242 /* If we are already flying, we know we have no signalers */
243 if (node_started(node))
244 continue;
245
e2f3496e
CW
246 /*
247 * Within an engine, there can be no cycle, but we may
248 * refer to the same dependency chain multiple times
249 * (redundant dependencies are not eliminated) and across
250 * engines.
251 */
252 list_for_each_entry(p, &node->signalers_list, signal_link) {
253 GEM_BUG_ON(p == dep); /* no cycles! */
254
255 if (node_signaled(p->signaler))
256 continue;
257
e2f3496e
CW
258 if (prio > READ_ONCE(p->signaler->attr.priority))
259 list_move_tail(&p->dfs_link, &dfs);
260 }
261 }
262
263 /*
264 * If we didn't need to bump any existing priorities, and we haven't
265 * yet submitted this request (i.e. there is no potential race with
266 * execlists_submit_request()), we can set our own priority and skip
267 * acquiring the engine locks.
268 */
52c76fb1
CW
269 if (node->attr.priority == I915_PRIORITY_INVALID) {
270 GEM_BUG_ON(!list_empty(&node->link));
271 node->attr = *attr;
e2f3496e
CW
272
273 if (stack.dfs_link.next == stack.dfs_link.prev)
e9eaf82d 274 return;
e2f3496e
CW
275
276 __list_del_entry(&stack.dfs_link);
277 }
278
ed7dc677 279 memset(&cache, 0, sizeof(cache));
52c76fb1 280 engine = node_to_request(node)->engine;
422d7df4 281 spin_lock(&engine->active.lock);
e2f3496e
CW
282
283 /* Fifo and depth-first replacement ensure our deps execute before us */
6d06779e 284 engine = sched_lock_engine(node, engine, &cache);
e2f3496e 285 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
e2f3496e
CW
286 INIT_LIST_HEAD(&dep->dfs_link);
287
52c76fb1 288 node = dep->signaler;
ed7dc677 289 engine = sched_lock_engine(node, engine, &cache);
422d7df4 290 lockdep_assert_held(&engine->active.lock);
e2f3496e
CW
291
292 /* Recheck after acquiring the engine->timeline.lock */
293 if (prio <= node->attr.priority || node_signaled(node))
294 continue;
295
6d06779e
CW
296 GEM_BUG_ON(node_to_request(node)->engine != engine);
297
e2f3496e 298 node->attr.priority = prio;
422d7df4
CW
299
300 if (list_empty(&node->link)) {
e2f3496e
CW
301 /*
302 * If the request is not in the priolist queue because
303 * it is not yet runnable, then it doesn't contribute
304 * to our preemption decisions. On the other hand,
305 * if the request is on the HW, it too is not in the
306 * queue; but in that case we may still need to reorder
307 * the inflight requests.
308 */
422d7df4
CW
309 continue;
310 }
311
312 if (!intel_engine_is_virtual(engine) &&
313 !i915_request_is_active(node_to_request(node))) {
314 if (!cache.priolist)
315 cache.priolist =
316 i915_sched_lookup_priolist(engine,
317 prio);
318 list_move_tail(&node->link, cache.priolist);
e2f3496e
CW
319 }
320
4d97cbe0 321 if (prio <= engine->execlists.queue_priority_hint)
e2f3496e
CW
322 continue;
323
c9a64622
CW
324 engine->execlists.queue_priority_hint = prio;
325
e2f3496e 326 /* Defer (tasklet) submission until after all of our updates. */
25d851ad 327 kick_submission(engine, prio);
e2f3496e
CW
328 }
329
422d7df4 330 spin_unlock(&engine->active.lock);
e9eaf82d 331}
e2f3496e 332
e9eaf82d
CW
333void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
334{
b7404c7e 335 spin_lock_irq(&schedule_lock);
52c76fb1 336 __i915_schedule(&rq->sched, attr);
b7404c7e 337 spin_unlock_irq(&schedule_lock);
e2f3496e 338}
e9eaf82d 339
52c76fb1
CW
340static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
341{
342 struct i915_sched_attr attr = node->attr;
343
344 attr.priority |= bump;
345 __i915_schedule(node, &attr);
346}
347
e9eaf82d
CW
348void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
349{
b7404c7e 350 unsigned long flags;
e9eaf82d
CW
351
352 GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
353
354 if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
355 return;
356
b7404c7e 357 spin_lock_irqsave(&schedule_lock, flags);
52c76fb1 358 __bump_priority(&rq->sched, bump);
b7404c7e 359 spin_unlock_irqrestore(&schedule_lock, flags);
e9eaf82d 360}
32eb6bcf 361
5ae87063 362void i915_sched_node_init(struct i915_sched_node *node)
32eb6bcf 363{
5ae87063
CW
364 INIT_LIST_HEAD(&node->signalers_list);
365 INIT_LIST_HEAD(&node->waiters_list);
366 INIT_LIST_HEAD(&node->link);
367 node->attr.priority = I915_PRIORITY_INVALID;
368 node->semaphores = 0;
369 node->flags = 0;
370}
371
372static struct i915_dependency *
373i915_dependency_alloc(void)
374{
375 return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
376}
377
378static void
379i915_dependency_free(struct i915_dependency *dep)
380{
381 kmem_cache_free(global.slab_dependencies, dep);
382}
383
384bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
385 struct i915_sched_node *signal,
386 struct i915_dependency *dep,
387 unsigned long flags)
388{
389 bool ret = false;
390
391 spin_lock_irq(&schedule_lock);
392
393 if (!node_signaled(signal)) {
394 INIT_LIST_HEAD(&dep->dfs_link);
395 list_add(&dep->wait_link, &signal->waiters_list);
396 list_add(&dep->signal_link, &node->signalers_list);
397 dep->signaler = signal;
398 dep->flags = flags;
399
400 /* Keep track of whether anyone on this chain has a semaphore */
401 if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
402 !node_started(signal))
403 node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
404
6e7eb7a8
CW
405 /*
406 * As we do not allow WAIT to preempt inflight requests,
407 * once we have executed a request, along with triggering
408 * any execution callbacks, we must preserve its ordering
409 * within the non-preemptible FIFO.
410 */
411 BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
412 if (flags & I915_DEPENDENCY_EXTERNAL)
413 __bump_priority(signal, __NO_PREEMPTION);
414
5ae87063
CW
415 ret = true;
416 }
417
418 spin_unlock_irq(&schedule_lock);
419
420 return ret;
421}
422
423int i915_sched_node_add_dependency(struct i915_sched_node *node,
424 struct i915_sched_node *signal)
425{
426 struct i915_dependency *dep;
427
428 dep = i915_dependency_alloc();
429 if (!dep)
430 return -ENOMEM;
431
432 if (!__i915_sched_node_add_dependency(node, signal, dep,
6e7eb7a8 433 I915_DEPENDENCY_EXTERNAL |
5ae87063
CW
434 I915_DEPENDENCY_ALLOC))
435 i915_dependency_free(dep);
436
437 return 0;
438}
439
440void i915_sched_node_fini(struct i915_sched_node *node)
441{
442 struct i915_dependency *dep, *tmp;
443
5ae87063
CW
444 spin_lock_irq(&schedule_lock);
445
446 /*
447 * Everyone we depended upon (the fences we wait to be signaled)
448 * should retire before us and remove themselves from our list.
449 * However, retirement is run independently on each timeline and
450 * so we may be called out-of-order.
451 */
452 list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
453 GEM_BUG_ON(!node_signaled(dep->signaler));
454 GEM_BUG_ON(!list_empty(&dep->dfs_link));
455
456 list_del(&dep->wait_link);
457 if (dep->flags & I915_DEPENDENCY_ALLOC)
458 i915_dependency_free(dep);
459 }
460
461 /* Remove ourselves from everyone who depends upon us */
462 list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
463 GEM_BUG_ON(dep->signaler != node);
464 GEM_BUG_ON(!list_empty(&dep->dfs_link));
465
466 list_del(&dep->signal_link);
467 if (dep->flags & I915_DEPENDENCY_ALLOC)
468 i915_dependency_free(dep);
469 }
470
471 spin_unlock_irq(&schedule_lock);
32eb6bcf
CW
472}
473
103b76ee
CW
474static void i915_global_scheduler_shrink(void)
475{
476 kmem_cache_shrink(global.slab_dependencies);
477 kmem_cache_shrink(global.slab_priorities);
478}
479
480static void i915_global_scheduler_exit(void)
481{
482 kmem_cache_destroy(global.slab_dependencies);
483 kmem_cache_destroy(global.slab_priorities);
484}
485
486static struct i915_global_scheduler global = { {
487 .shrink = i915_global_scheduler_shrink,
488 .exit = i915_global_scheduler_exit,
489} };
490
32eb6bcf
CW
491int __init i915_global_scheduler_init(void)
492{
493 global.slab_dependencies = KMEM_CACHE(i915_dependency,
494 SLAB_HWCACHE_ALIGN);
495 if (!global.slab_dependencies)
496 return -ENOMEM;
497
498 global.slab_priorities = KMEM_CACHE(i915_priolist,
499 SLAB_HWCACHE_ALIGN);
500 if (!global.slab_priorities)
501 goto err_priorities;
502
103b76ee 503 i915_global_register(&global.base);
32eb6bcf
CW
504 return 0;
505
506err_priorities:
507 kmem_cache_destroy(global.slab_priorities);
508 return -ENOMEM;
509}