drm/i915: Group the irq breadcrumb variables into the same cacheline
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_breadcrumbs.c
CommitLineData
688e6c72
CW
1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
c81d4613
CW
25#include <linux/kthread.h>
26
688e6c72
CW
27#include "i915_drv.h"
28
29static void intel_breadcrumbs_fake_irq(unsigned long data)
30{
31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
32
33 /*
34 * The timer persists in case we cannot enable interrupts,
35 * or if we have previously seen seqno/interrupt incoherency
36 * ("missed interrupt" syndrome). Here the worker will wake up
37 * every jiffie in order to kick the oldest waiter to do the
38 * coherent seqno check.
39 */
40 rcu_read_lock();
41 if (intel_engine_wakeup(engine))
42 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
43 rcu_read_unlock();
44}
45
46static void irq_enable(struct intel_engine_cs *engine)
47{
3d5564e9
CW
48 /* Enabling the IRQ may miss the generation of the interrupt, but
49 * we still need to force the barrier before reading the seqno,
50 * just in case.
51 */
aca34b6e 52 engine->breadcrumbs.irq_posted = true;
31bb59cc
CW
53
54 spin_lock_irq(&engine->i915->irq_lock);
55 engine->irq_enable(engine);
56 spin_unlock_irq(&engine->i915->irq_lock);
688e6c72
CW
57}
58
59static void irq_disable(struct intel_engine_cs *engine)
60{
31bb59cc
CW
61 spin_lock_irq(&engine->i915->irq_lock);
62 engine->irq_disable(engine);
63 spin_unlock_irq(&engine->i915->irq_lock);
64
aca34b6e 65 engine->breadcrumbs.irq_posted = false;
688e6c72
CW
66}
67
04171313 68static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
688e6c72
CW
69{
70 struct intel_engine_cs *engine =
71 container_of(b, struct intel_engine_cs, breadcrumbs);
72 struct drm_i915_private *i915 = engine->i915;
688e6c72
CW
73
74 assert_spin_locked(&b->lock);
75 if (b->rpm_wakelock)
04171313 76 return;
688e6c72
CW
77
78 /* Since we are waiting on a request, the GPU should be busy
79 * and should have its own rpm reference. For completeness,
80 * record an rpm reference for ourselves to cover the
81 * interrupt we unmask.
82 */
83 intel_runtime_pm_get_noresume(i915);
84 b->rpm_wakelock = true;
85
86 /* No interrupts? Kick the waiter every jiffie! */
87 if (intel_irqs_enabled(i915)) {
3d5564e9 88 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
688e6c72 89 irq_enable(engine);
688e6c72
CW
90 b->irq_enabled = true;
91 }
92
93 if (!b->irq_enabled ||
94 test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
95 mod_timer(&b->fake_irq, jiffies + 1);
688e6c72
CW
96}
97
98static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
99{
100 struct intel_engine_cs *engine =
101 container_of(b, struct intel_engine_cs, breadcrumbs);
102
103 assert_spin_locked(&b->lock);
104 if (!b->rpm_wakelock)
105 return;
106
107 if (b->irq_enabled) {
108 irq_disable(engine);
109 b->irq_enabled = false;
110 }
111
112 intel_runtime_pm_put(engine->i915);
113 b->rpm_wakelock = false;
114}
115
116static inline struct intel_wait *to_wait(struct rb_node *node)
117{
118 return container_of(node, struct intel_wait, node);
119}
120
121static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
122 struct intel_wait *wait)
123{
124 assert_spin_locked(&b->lock);
125
126 /* This request is completed, so remove it from the tree, mark it as
127 * complete, and *then* wake up the associated task.
128 */
129 rb_erase(&wait->node, &b->waiters);
130 RB_CLEAR_NODE(&wait->node);
131
132 wake_up_process(wait->tsk); /* implicit smp_wmb() */
133}
134
135static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
136 struct intel_wait *wait)
137{
138 struct intel_breadcrumbs *b = &engine->breadcrumbs;
139 struct rb_node **p, *parent, *completed;
140 bool first;
141 u32 seqno;
142
143 /* Insert the request into the retirement ordered list
144 * of waiters by walking the rbtree. If we are the oldest
145 * seqno in the tree (the first to be retired), then
146 * set ourselves as the bottom-half.
147 *
148 * As we descend the tree, prune completed branches since we hold the
149 * spinlock we know that the first_waiter must be delayed and can
150 * reduce some of the sequential wake up latency if we take action
151 * ourselves and wake up the completed tasks in parallel. Also, by
152 * removing stale elements in the tree, we may be able to reduce the
153 * ping-pong between the old bottom-half and ourselves as first-waiter.
154 */
155 first = true;
156 parent = NULL;
157 completed = NULL;
1b7744e7 158 seqno = intel_engine_get_seqno(engine);
688e6c72
CW
159
160 /* If the request completed before we managed to grab the spinlock,
161 * return now before adding ourselves to the rbtree. We let the
162 * current bottom-half handle any pending wakeups and instead
163 * try and get out of the way quickly.
164 */
165 if (i915_seqno_passed(seqno, wait->seqno)) {
166 RB_CLEAR_NODE(&wait->node);
167 return first;
168 }
169
170 p = &b->waiters.rb_node;
171 while (*p) {
172 parent = *p;
173 if (wait->seqno == to_wait(parent)->seqno) {
174 /* We have multiple waiters on the same seqno, select
175 * the highest priority task (that with the smallest
176 * task->prio) to serve as the bottom-half for this
177 * group.
178 */
179 if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
180 p = &parent->rb_right;
181 first = false;
182 } else {
183 p = &parent->rb_left;
184 }
185 } else if (i915_seqno_passed(wait->seqno,
186 to_wait(parent)->seqno)) {
187 p = &parent->rb_right;
188 if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
189 completed = parent;
190 else
191 first = false;
192 } else {
193 p = &parent->rb_left;
194 }
195 }
196 rb_link_node(&wait->node, parent, p);
197 rb_insert_color(&wait->node, &b->waiters);
aca34b6e 198 GEM_BUG_ON(!first && !b->irq_seqno_bh);
688e6c72
CW
199
200 if (completed) {
201 struct rb_node *next = rb_next(completed);
202
203 GEM_BUG_ON(!next && !first);
204 if (next && next != &wait->node) {
205 GEM_BUG_ON(first);
206 b->first_wait = to_wait(next);
aca34b6e 207 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
688e6c72
CW
208 /* As there is a delay between reading the current
209 * seqno, processing the completed tasks and selecting
210 * the next waiter, we may have missed the interrupt
211 * and so need for the next bottom-half to wakeup.
212 *
213 * Also as we enable the IRQ, we may miss the
214 * interrupt for that seqno, so we have to wake up
215 * the next bottom-half in order to do a coherent check
216 * in case the seqno passed.
217 */
218 __intel_breadcrumbs_enable_irq(b);
aca34b6e 219 if (READ_ONCE(b->irq_posted))
3d5564e9 220 wake_up_process(to_wait(next)->tsk);
688e6c72
CW
221 }
222
223 do {
224 struct intel_wait *crumb = to_wait(completed);
225 completed = rb_prev(completed);
226 __intel_breadcrumbs_finish(b, crumb);
227 } while (completed);
228 }
229
230 if (first) {
231 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
232 b->first_wait = wait;
aca34b6e 233 smp_store_mb(b->irq_seqno_bh, wait->tsk);
04171313
CW
234 /* After assigning ourselves as the new bottom-half, we must
235 * perform a cursory check to prevent a missed interrupt.
236 * Either we miss the interrupt whilst programming the hardware,
237 * or if there was a previous waiter (for a later seqno) they
238 * may be woken instead of us (due to the inherent race
aca34b6e
CW
239 * in the unlocked read of b->irq_seqno_bh in the irq handler)
240 * and so we miss the wake up.
04171313
CW
241 */
242 __intel_breadcrumbs_enable_irq(b);
688e6c72 243 }
aca34b6e 244 GEM_BUG_ON(!b->irq_seqno_bh);
688e6c72
CW
245 GEM_BUG_ON(!b->first_wait);
246 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
247
248 return first;
249}
250
251bool intel_engine_add_wait(struct intel_engine_cs *engine,
252 struct intel_wait *wait)
253{
254 struct intel_breadcrumbs *b = &engine->breadcrumbs;
255 bool first;
256
257 spin_lock(&b->lock);
258 first = __intel_engine_add_wait(engine, wait);
259 spin_unlock(&b->lock);
260
261 return first;
262}
263
264void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
265{
266 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
267}
268
269static inline bool chain_wakeup(struct rb_node *rb, int priority)
270{
271 return rb && to_wait(rb)->tsk->prio <= priority;
272}
273
c81d4613
CW
274static inline int wakeup_priority(struct intel_breadcrumbs *b,
275 struct task_struct *tsk)
276{
277 if (tsk == b->signaler)
278 return INT_MIN;
279 else
280 return tsk->prio;
281}
282
688e6c72
CW
283void intel_engine_remove_wait(struct intel_engine_cs *engine,
284 struct intel_wait *wait)
285{
286 struct intel_breadcrumbs *b = &engine->breadcrumbs;
287
288 /* Quick check to see if this waiter was already decoupled from
289 * the tree by the bottom-half to avoid contention on the spinlock
290 * by the herd.
291 */
292 if (RB_EMPTY_NODE(&wait->node))
293 return;
294
295 spin_lock(&b->lock);
296
297 if (RB_EMPTY_NODE(&wait->node))
298 goto out_unlock;
299
300 if (b->first_wait == wait) {
c81d4613 301 const int priority = wakeup_priority(b, wait->tsk);
688e6c72 302 struct rb_node *next;
688e6c72 303
aca34b6e 304 GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
688e6c72
CW
305
306 /* We are the current bottom-half. Find the next candidate,
307 * the first waiter in the queue on the remaining oldest
308 * request. As multiple seqnos may complete in the time it
309 * takes us to wake up and find the next waiter, we have to
310 * wake up that waiter for it to perform its own coherent
311 * completion check.
312 */
313 next = rb_next(&wait->node);
314 if (chain_wakeup(next, priority)) {
315 /* If the next waiter is already complete,
316 * wake it up and continue onto the next waiter. So
317 * if have a small herd, they will wake up in parallel
318 * rather than sequentially, which should reduce
319 * the overall latency in waking all the completed
320 * clients.
321 *
322 * However, waking up a chain adds extra latency to
323 * the first_waiter. This is undesirable if that
324 * waiter is a high priority task.
325 */
1b7744e7 326 u32 seqno = intel_engine_get_seqno(engine);
688e6c72
CW
327
328 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
329 struct rb_node *n = rb_next(next);
330
331 __intel_breadcrumbs_finish(b, to_wait(next));
332 next = n;
333 if (!chain_wakeup(next, priority))
334 break;
335 }
336 }
337
338 if (next) {
339 /* In our haste, we may have completed the first waiter
340 * before we enabled the interrupt. Do so now as we
341 * have a second waiter for a future seqno. Afterwards,
342 * we have to wake up that waiter in case we missed
343 * the interrupt, or if we have to handle an
344 * exception rather than a seqno completion.
345 */
346 b->first_wait = to_wait(next);
aca34b6e 347 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
688e6c72
CW
348 if (b->first_wait->seqno != wait->seqno)
349 __intel_breadcrumbs_enable_irq(b);
aca34b6e 350 wake_up_process(b->irq_seqno_bh);
688e6c72
CW
351 } else {
352 b->first_wait = NULL;
aca34b6e 353 WRITE_ONCE(b->irq_seqno_bh, NULL);
688e6c72
CW
354 __intel_breadcrumbs_disable_irq(b);
355 }
356 } else {
357 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
358 }
359
360 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
361 rb_erase(&wait->node, &b->waiters);
362
363out_unlock:
364 GEM_BUG_ON(b->first_wait == wait);
365 GEM_BUG_ON(rb_first(&b->waiters) !=
366 (b->first_wait ? &b->first_wait->node : NULL));
aca34b6e 367 GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
688e6c72
CW
368 spin_unlock(&b->lock);
369}
370
b3850855 371static bool signal_complete(struct drm_i915_gem_request *request)
c81d4613 372{
b3850855 373 if (!request)
c81d4613
CW
374 return false;
375
376 /* If another process served as the bottom-half it may have already
377 * signalled that this wait is already completed.
378 */
b3850855 379 if (intel_wait_complete(&request->signaling.wait))
c81d4613
CW
380 return true;
381
382 /* Carefully check if the request is complete, giving time for the
383 * seqno to be visible or if the GPU hung.
384 */
b3850855 385 if (__i915_request_irq_complete(request))
c81d4613
CW
386 return true;
387
388 return false;
389}
390
b3850855 391static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
c81d4613 392{
b3850855 393 return container_of(rb, struct drm_i915_gem_request, signaling.node);
c81d4613
CW
394}
395
396static void signaler_set_rtpriority(void)
397{
398 struct sched_param param = { .sched_priority = 1 };
399
400 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
401}
402
403static int intel_breadcrumbs_signaler(void *arg)
404{
405 struct intel_engine_cs *engine = arg;
406 struct intel_breadcrumbs *b = &engine->breadcrumbs;
b3850855 407 struct drm_i915_gem_request *request;
c81d4613
CW
408
409 /* Install ourselves with high priority to reduce signalling latency */
410 signaler_set_rtpriority();
411
412 do {
413 set_current_state(TASK_INTERRUPTIBLE);
414
415 /* We are either woken up by the interrupt bottom-half,
416 * or by a client adding a new signaller. In both cases,
417 * the GPU seqno may have advanced beyond our oldest signal.
418 * If it has, propagate the signal, remove the waiter and
419 * check again with the next oldest signal. Otherwise we
420 * need to wait for a new interrupt from the GPU or for
421 * a new client.
422 */
b3850855
CW
423 request = READ_ONCE(b->first_signal);
424 if (signal_complete(request)) {
c81d4613
CW
425 /* Wake up all other completed waiters and select the
426 * next bottom-half for the next user interrupt.
427 */
b3850855
CW
428 intel_engine_remove_wait(engine,
429 &request->signaling.wait);
c81d4613
CW
430
431 /* Find the next oldest signal. Note that as we have
432 * not been holding the lock, another client may
433 * have installed an even older signal than the one
434 * we just completed - so double check we are still
435 * the oldest before picking the next one.
436 */
437 spin_lock(&b->lock);
b3850855
CW
438 if (request == b->first_signal) {
439 struct rb_node *rb =
440 rb_next(&request->signaling.node);
441 b->first_signal = rb ? to_signaler(rb) : NULL;
442 }
443 rb_erase(&request->signaling.node, &b->signals);
c81d4613
CW
444 spin_unlock(&b->lock);
445
b3850855 446 i915_gem_request_unreference(request);
c81d4613
CW
447 } else {
448 if (kthread_should_stop())
449 break;
450
451 schedule();
452 }
453 } while (1);
454 __set_current_state(TASK_RUNNING);
455
456 return 0;
457}
458
b3850855 459void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
c81d4613
CW
460{
461 struct intel_engine_cs *engine = request->engine;
462 struct intel_breadcrumbs *b = &engine->breadcrumbs;
463 struct rb_node *parent, **p;
c81d4613
CW
464 bool first, wakeup;
465
b3850855
CW
466 if (unlikely(READ_ONCE(request->signaling.wait.tsk)))
467 return;
c81d4613 468
b3850855
CW
469 spin_lock(&b->lock);
470 if (unlikely(request->signaling.wait.tsk)) {
471 wakeup = false;
472 goto unlock;
473 }
c81d4613 474
b3850855
CW
475 request->signaling.wait.tsk = b->signaler;
476 request->signaling.wait.seqno = request->seqno;
477 i915_gem_request_reference(request);
c81d4613
CW
478
479 /* First add ourselves into the list of waiters, but register our
480 * bottom-half as the signaller thread. As per usual, only the oldest
481 * waiter (not just signaller) is tasked as the bottom-half waking
482 * up all completed waiters after the user interrupt.
483 *
484 * If we are the oldest waiter, enable the irq (after which we
485 * must double check that the seqno did not complete).
486 */
b3850855 487 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
c81d4613
CW
488
489 /* Now insert ourselves into the retirement ordered list of signals
490 * on this engine. We track the oldest seqno as that will be the
491 * first signal to complete.
492 */
c81d4613
CW
493 parent = NULL;
494 first = true;
495 p = &b->signals.rb_node;
496 while (*p) {
497 parent = *p;
b3850855
CW
498 if (i915_seqno_passed(request->seqno,
499 to_signaler(parent)->seqno)) {
c81d4613
CW
500 p = &parent->rb_right;
501 first = false;
502 } else {
503 p = &parent->rb_left;
504 }
505 }
b3850855
CW
506 rb_link_node(&request->signaling.node, parent, p);
507 rb_insert_color(&request->signaling.node, &b->signals);
c81d4613 508 if (first)
b3850855
CW
509 smp_store_mb(b->first_signal, request);
510
511unlock:
c81d4613
CW
512 spin_unlock(&b->lock);
513
514 if (wakeup)
515 wake_up_process(b->signaler);
c81d4613
CW
516}
517
688e6c72
CW
518int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
519{
520 struct intel_breadcrumbs *b = &engine->breadcrumbs;
c81d4613 521 struct task_struct *tsk;
688e6c72
CW
522
523 spin_lock_init(&b->lock);
524 setup_timer(&b->fake_irq,
525 intel_breadcrumbs_fake_irq,
526 (unsigned long)engine);
527
c81d4613
CW
528 /* Spawn a thread to provide a common bottom-half for all signals.
529 * As this is an asynchronous interface we cannot steal the current
530 * task for handling the bottom-half to the user interrupt, therefore
531 * we create a thread to do the coherent seqno dance after the
532 * interrupt and then signal the waitqueue (via the dma-buf/fence).
533 */
534 tsk = kthread_run(intel_breadcrumbs_signaler, engine,
535 "i915/signal:%d", engine->id);
536 if (IS_ERR(tsk))
537 return PTR_ERR(tsk);
538
539 b->signaler = tsk;
540
688e6c72
CW
541 return 0;
542}
543
544void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
545{
546 struct intel_breadcrumbs *b = &engine->breadcrumbs;
547
c81d4613
CW
548 if (!IS_ERR_OR_NULL(b->signaler))
549 kthread_stop(b->signaler);
550
688e6c72
CW
551 del_timer_sync(&b->fake_irq);
552}
553
554unsigned int intel_kick_waiters(struct drm_i915_private *i915)
555{
556 struct intel_engine_cs *engine;
557 unsigned int mask = 0;
558
559 /* To avoid the task_struct disappearing beneath us as we wake up
560 * the process, we must first inspect the task_struct->state under the
561 * RCU lock, i.e. as we call wake_up_process() we must be holding the
562 * rcu_read_lock().
563 */
564 rcu_read_lock();
565 for_each_engine(engine, i915)
566 if (unlikely(intel_engine_wakeup(engine)))
567 mask |= intel_engine_flag(engine);
568 rcu_read_unlock();
569
570 return mask;
571}
c81d4613
CW
572
573unsigned int intel_kick_signalers(struct drm_i915_private *i915)
574{
575 struct intel_engine_cs *engine;
576 unsigned int mask = 0;
577
578 for_each_engine(engine, i915) {
579 if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
580 wake_up_process(engine->breadcrumbs.signaler);
581 mask |= intel_engine_flag(engine);
582 }
583 }
584
585 return mask;
586}