drm/i915/gt: Split intel_ring_submission
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_request.c
CommitLineData
05235c53
CW
1/*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
b52992c0 25#include <linux/dma-fence-array.h>
e8861964
CW
26#include <linux/irq_work.h>
27#include <linux/prefetch.h>
e6017571
IM
28#include <linux/sched.h>
29#include <linux/sched/clock.h>
f361bf4a 30#include <linux/sched/signal.h>
fa545cbf 31
10be98a7
CW
32#include "gem/i915_gem_context.h"
33#include "gt/intel_context.h"
2871ea85 34#include "gt/intel_ring.h"
10be98a7 35
21950ee7 36#include "i915_active.h"
696173b0 37#include "i915_drv.h"
103b76ee 38#include "i915_globals.h"
a09d9a80 39#include "i915_trace.h"
696173b0 40#include "intel_pm.h"
05235c53 41
e8861964
CW
42struct execute_cb {
43 struct list_head link;
44 struct irq_work work;
45 struct i915_sw_fence *fence;
f71e01a7
CW
46 void (*hook)(struct i915_request *rq, struct dma_fence *signal);
47 struct i915_request *signal;
e8861964
CW
48};
49
32eb6bcf 50static struct i915_global_request {
103b76ee 51 struct i915_global base;
32eb6bcf
CW
52 struct kmem_cache *slab_requests;
53 struct kmem_cache *slab_dependencies;
e8861964 54 struct kmem_cache *slab_execute_cbs;
32eb6bcf
CW
55} global;
56
f54d1867 57static const char *i915_fence_get_driver_name(struct dma_fence *fence)
04769652
CW
58{
59 return "i915";
60}
61
f54d1867 62static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
04769652 63{
e61e0f51
CW
64 /*
65 * The timeline struct (as part of the ppgtt underneath a context)
05506b5b
CW
66 * may be freed when the request is no longer in use by the GPU.
67 * We could extend the life of a context to beyond that of all
68 * fences, possibly keeping the hw resource around indefinitely,
69 * or we just give them a false name. Since
70 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
71 * lie seems justifiable.
72 */
73 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
74 return "signaled";
75
4daffb66 76 return to_request(fence)->gem_context->name ?: "[i915]";
04769652
CW
77}
78
f54d1867 79static bool i915_fence_signaled(struct dma_fence *fence)
04769652 80{
e61e0f51 81 return i915_request_completed(to_request(fence));
04769652
CW
82}
83
f54d1867 84static bool i915_fence_enable_signaling(struct dma_fence *fence)
04769652 85{
52c0fdb2 86 return i915_request_enable_breadcrumb(to_request(fence));
04769652
CW
87}
88
f54d1867 89static signed long i915_fence_wait(struct dma_fence *fence,
04769652 90 bool interruptible,
e95433c7 91 signed long timeout)
04769652 92{
62eb3c24
CW
93 return i915_request_wait(to_request(fence),
94 interruptible | I915_WAIT_PRIORITY,
95 timeout);
04769652
CW
96}
97
f54d1867 98static void i915_fence_release(struct dma_fence *fence)
04769652 99{
e61e0f51 100 struct i915_request *rq = to_request(fence);
04769652 101
e61e0f51
CW
102 /*
103 * The request is put onto a RCU freelist (i.e. the address
fc158405
CW
104 * is immediately reused), mark the fences as being freed now.
105 * Otherwise the debugobjects for the fences are only marked as
106 * freed when the slab cache itself is freed, and so we would get
107 * caught trying to reuse dead objects.
108 */
e61e0f51 109 i915_sw_fence_fini(&rq->submit);
0c441cb6 110 i915_sw_fence_fini(&rq->semaphore);
fc158405 111
32eb6bcf 112 kmem_cache_free(global.slab_requests, rq);
04769652
CW
113}
114
f54d1867 115const struct dma_fence_ops i915_fence_ops = {
04769652
CW
116 .get_driver_name = i915_fence_get_driver_name,
117 .get_timeline_name = i915_fence_get_timeline_name,
118 .enable_signaling = i915_fence_enable_signaling,
119 .signaled = i915_fence_signaled,
120 .wait = i915_fence_wait,
121 .release = i915_fence_release,
04769652
CW
122};
123
b87b6c0d
CW
124static void irq_execute_cb(struct irq_work *wrk)
125{
126 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
127
128 i915_sw_fence_complete(cb->fence);
129 kmem_cache_free(global.slab_execute_cbs, cb);
130}
131
132static void irq_execute_cb_hook(struct irq_work *wrk)
133{
134 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
135
136 cb->hook(container_of(cb->fence, struct i915_request, submit),
137 &cb->signal->fence);
138 i915_request_put(cb->signal);
139
140 irq_execute_cb(wrk);
141}
142
143static void __notify_execute_cb(struct i915_request *rq)
144{
145 struct execute_cb *cb;
146
147 lockdep_assert_held(&rq->lock);
148
149 if (list_empty(&rq->execute_cb))
150 return;
151
152 list_for_each_entry(cb, &rq->execute_cb, link)
153 irq_work_queue(&cb->work);
154
155 /*
156 * XXX Rollback on __i915_request_unsubmit()
157 *
158 * In the future, perhaps when we have an active time-slicing scheduler,
159 * it will be interesting to unsubmit parallel execution and remove
160 * busywaits from the GPU until their master is restarted. This is
161 * quite hairy, we have to carefully rollback the fence and do a
162 * preempt-to-idle cycle on the target engine, all the while the
163 * master execute_cb may refire.
164 */
165 INIT_LIST_HEAD(&rq->execute_cb);
166}
167
05235c53 168static inline void
44c22f3f 169remove_from_client(struct i915_request *request)
05235c53 170{
c8659efa 171 struct drm_i915_file_private *file_priv;
05235c53 172
77715906 173 if (!READ_ONCE(request->file_priv))
05235c53
CW
174 return;
175
77715906
CW
176 rcu_read_lock();
177 file_priv = xchg(&request->file_priv, NULL);
178 if (file_priv) {
179 spin_lock(&file_priv->mm.lock);
c8659efa 180 list_del(&request->client_link);
77715906 181 spin_unlock(&file_priv->mm.lock);
c8659efa 182 }
77715906 183 rcu_read_unlock();
05235c53
CW
184}
185
e61e0f51 186static void free_capture_list(struct i915_request *request)
b0fd47ad 187{
e61e0f51 188 struct i915_capture_list *capture;
b0fd47ad
CW
189
190 capture = request->capture_list;
191 while (capture) {
e61e0f51 192 struct i915_capture_list *next = capture->next;
b0fd47ad
CW
193
194 kfree(capture);
195 capture = next;
196 }
197}
198
37fa0de3
CW
199static void remove_from_engine(struct i915_request *rq)
200{
201 struct intel_engine_cs *engine, *locked;
202
203 /*
204 * Virtual engines complicate acquiring the engine timeline lock,
205 * as their rq->engine pointer is not stable until under that
206 * engine lock. The simple ploy we use is to take the lock then
207 * check that the rq still belongs to the newly locked engine.
208 */
209 locked = READ_ONCE(rq->engine);
1dfffa00 210 spin_lock_irq(&locked->active.lock);
37fa0de3
CW
211 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
212 spin_unlock(&locked->active.lock);
213 spin_lock(&engine->active.lock);
214 locked = engine;
215 }
216 list_del(&rq->sched.link);
1dfffa00 217 spin_unlock_irq(&locked->active.lock);
37fa0de3
CW
218}
219
66101975 220bool i915_request_retire(struct i915_request *rq)
05235c53 221{
9db0c5ca
CW
222 if (!i915_request_completed(rq))
223 return false;
d9b13c4d 224
9db0c5ca
CW
225 GEM_TRACE("%s fence %llx:%lld, current %d\n",
226 rq->engine->name,
227 rq->fence.context, rq->fence.seqno,
228 hwsp_seqno(rq));
4c7d62c6 229
9db0c5ca
CW
230 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
231 trace_i915_request_retire(rq);
80b204bc 232
e5dadff4
CW
233 /*
234 * We know the GPU must have read the request to have
235 * sent us the seqno + interrupt, so use the position
236 * of tail of the request to update the last known position
237 * of the GPU head.
238 *
239 * Note this requires that we are always called in request
240 * completion order.
241 */
d19d71fc
CW
242 GEM_BUG_ON(!list_is_first(&rq->link,
243 &i915_request_timeline(rq)->requests));
e5dadff4 244 rq->ring->head = rq->postfix;
b0fd47ad 245
22b7a426
CW
246 /*
247 * We only loosely track inflight requests across preemption,
248 * and so we may find ourselves attempting to retire a _completed_
249 * request that we have removed from the HW and put back on a run
250 * queue.
251 */
37fa0de3 252 remove_from_engine(rq);
52e54209 253
1dfffa00 254 spin_lock_irq(&rq->lock);
9db0c5ca
CW
255 i915_request_mark_complete(rq);
256 if (!i915_request_signaled(rq))
257 dma_fence_signal_locked(&rq->fence);
258 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
259 i915_request_cancel_breadcrumb(rq);
2a98f4e6 260 if (i915_request_has_waitboost(rq)) {
9db0c5ca
CW
261 GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
262 atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
263 }
b87b6c0d
CW
264 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
265 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
266 __notify_execute_cb(rq);
267 }
268 GEM_BUG_ON(!list_empty(&rq->execute_cb));
1dfffa00 269 spin_unlock_irq(&rq->lock);
52d7f16e 270
44c22f3f 271 remove_from_client(rq);
422d7df4 272 list_del(&rq->link);
9db0c5ca 273
75d0a7f3
CW
274 intel_context_exit(rq->hw_context);
275 intel_context_unpin(rq->hw_context);
276
9db0c5ca
CW
277 free_capture_list(rq);
278 i915_sched_node_fini(&rq->sched);
279 i915_request_put(rq);
280
281 return true;
05235c53
CW
282}
283
e61e0f51 284void i915_request_retire_upto(struct i915_request *rq)
05235c53 285{
d19d71fc 286 struct intel_timeline * const tl = i915_request_timeline(rq);
e61e0f51 287 struct i915_request *tmp;
05235c53 288
b300fde8 289 GEM_TRACE("%s fence %llx:%lld, current %d\n",
b887d615
CW
290 rq->engine->name,
291 rq->fence.context, rq->fence.seqno,
8892f477 292 hwsp_seqno(rq));
b887d615 293
e61e0f51 294 GEM_BUG_ON(!i915_request_completed(rq));
4ffd6e0c 295
05235c53 296 do {
e5dadff4 297 tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
9db0c5ca 298 } while (i915_request_retire(tmp) && tmp != rq);
05235c53
CW
299}
300
e8861964 301static int
f71e01a7
CW
302__i915_request_await_execution(struct i915_request *rq,
303 struct i915_request *signal,
304 void (*hook)(struct i915_request *rq,
305 struct dma_fence *signal),
306 gfp_t gfp)
e8861964
CW
307{
308 struct execute_cb *cb;
309
f71e01a7
CW
310 if (i915_request_is_active(signal)) {
311 if (hook)
312 hook(rq, &signal->fence);
e8861964 313 return 0;
f71e01a7 314 }
e8861964
CW
315
316 cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
317 if (!cb)
318 return -ENOMEM;
319
320 cb->fence = &rq->submit;
321 i915_sw_fence_await(cb->fence);
322 init_irq_work(&cb->work, irq_execute_cb);
323
f71e01a7
CW
324 if (hook) {
325 cb->hook = hook;
326 cb->signal = i915_request_get(signal);
327 cb->work.func = irq_execute_cb_hook;
328 }
329
e8861964
CW
330 spin_lock_irq(&signal->lock);
331 if (i915_request_is_active(signal)) {
f71e01a7
CW
332 if (hook) {
333 hook(rq, &signal->fence);
334 i915_request_put(signal);
335 }
e8861964
CW
336 i915_sw_fence_complete(cb->fence);
337 kmem_cache_free(global.slab_execute_cbs, cb);
338 } else {
339 list_add_tail(&cb->link, &signal->execute_cb);
340 }
341 spin_unlock_irq(&signal->lock);
342
343 return 0;
344}
345
c0bb487d 346bool __i915_request_submit(struct i915_request *request)
5590af3e 347{
73cb9701 348 struct intel_engine_cs *engine = request->engine;
c0bb487d 349 bool result = false;
5590af3e 350
422d7df4 351 GEM_TRACE("%s fence %llx:%lld, current %d\n",
e7702760 352 engine->name,
d9b13c4d 353 request->fence.context, request->fence.seqno,
8892f477 354 hwsp_seqno(request));
d9b13c4d 355
e60a870d 356 GEM_BUG_ON(!irqs_disabled());
422d7df4 357 lockdep_assert_held(&engine->active.lock);
e60a870d 358
c0bb487d
CW
359 /*
360 * With the advent of preempt-to-busy, we frequently encounter
361 * requests that we have unsubmitted from HW, but left running
362 * until the next ack and so have completed in the meantime. On
363 * resubmission of that completed request, we can skip
364 * updating the payload, and execlists can even skip submitting
365 * the request.
366 *
367 * We must remove the request from the caller's priority queue,
368 * and the caller must only call us when the request is in their
369 * priority queue, under the active.lock. This ensures that the
370 * request has *not* yet been retired and we can safely move
371 * the request into the engine->active.list where it will be
372 * dropped upon retiring. (Otherwise if resubmit a *retired*
373 * request, this would be a horrible use-after-free.)
374 */
375 if (i915_request_completed(request))
376 goto xfer;
377
d9e61b66
CW
378 if (i915_gem_context_is_banned(request->gem_context))
379 i915_request_skip(request, -EIO);
380
ca6e56f6
CW
381 /*
382 * Are we using semaphores when the gpu is already saturated?
383 *
384 * Using semaphores incurs a cost in having the GPU poll a
385 * memory location, busywaiting for it to change. The continual
386 * memory reads can have a noticeable impact on the rest of the
387 * system with the extra bus traffic, stalling the cpu as it too
388 * tries to access memory across the bus (perf stat -e bus-cycles).
389 *
390 * If we installed a semaphore on this request and we only submit
391 * the request after the signaler completed, that indicates the
392 * system is overloaded and using semaphores at this time only
393 * increases the amount of work we are doing. If so, we disable
394 * further use of semaphores until we are idle again, whence we
395 * optimistically try again.
396 */
397 if (request->sched.semaphores &&
398 i915_sw_fence_signaled(&request->semaphore))
44d89409 399 engine->saturated |= request->sched.semaphores;
ca6e56f6 400
c0bb487d
CW
401 engine->emit_fini_breadcrumb(request,
402 request->ring->vaddr + request->postfix);
b5773a36 403
c0bb487d
CW
404 trace_i915_request_execute(request);
405 engine->serial++;
406 result = true;
422d7df4 407
c0bb487d
CW
408xfer: /* We may be recursing from the signal callback of another i915 fence */
409 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
410
411 if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
412 list_move_tail(&request->sched.link, &engine->active.requests);
b5773a36 413
52c0fdb2 414 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
0152b3b3 415 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
52c0fdb2
CW
416 !i915_request_enable_breadcrumb(request))
417 intel_engine_queue_breadcrumbs(engine);
b5773a36 418
e8861964
CW
419 __notify_execute_cb(request);
420
f2d13290
CW
421 spin_unlock(&request->lock);
422
c0bb487d 423 return result;
d55ac5bf
CW
424}
425
e61e0f51 426void i915_request_submit(struct i915_request *request)
d55ac5bf
CW
427{
428 struct intel_engine_cs *engine = request->engine;
429 unsigned long flags;
23902e49 430
d55ac5bf 431 /* Will be called from irq-context when using foreign fences. */
422d7df4 432 spin_lock_irqsave(&engine->active.lock, flags);
d55ac5bf 433
e61e0f51 434 __i915_request_submit(request);
d55ac5bf 435
422d7df4 436 spin_unlock_irqrestore(&engine->active.lock, flags);
d55ac5bf
CW
437}
438
e61e0f51 439void __i915_request_unsubmit(struct i915_request *request)
d55ac5bf 440{
d6a2289d 441 struct intel_engine_cs *engine = request->engine;
d55ac5bf 442
b300fde8 443 GEM_TRACE("%s fence %llx:%lld, current %d\n",
e7702760 444 engine->name,
d9b13c4d 445 request->fence.context, request->fence.seqno,
8892f477 446 hwsp_seqno(request));
d9b13c4d 447
e60a870d 448 GEM_BUG_ON(!irqs_disabled());
422d7df4 449 lockdep_assert_held(&engine->active.lock);
48bc2a4a 450
e61e0f51
CW
451 /*
452 * Only unwind in reverse order, required so that the per-context list
d6a2289d
CW
453 * is kept in seqno/ring order.
454 */
80b204bc 455
d6a2289d
CW
456 /* We may be recursing from the signal callback of another i915 fence */
457 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
b5773a36 458
d6a2289d 459 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
52c0fdb2 460 i915_request_cancel_breadcrumb(request);
b5773a36 461
52c0fdb2
CW
462 GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
463 clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
b5773a36 464
d6a2289d
CW
465 spin_unlock(&request->lock);
466
dba5a7f3
CW
467 /* We've already spun, don't charge on resubmitting. */
468 if (request->sched.semaphores && i915_request_started(request)) {
469 request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
470 request->sched.semaphores = 0;
471 }
472
e61e0f51
CW
473 /*
474 * We don't need to wake_up any waiters on request->execute, they
d6a2289d 475 * will get woken by any other event or us re-adding this request
e61e0f51 476 * to the engine timeline (__i915_request_submit()). The waiters
d6a2289d
CW
477 * should be quite adapt at finding that the request now has a new
478 * global_seqno to the one they went to sleep on.
479 */
480}
481
e61e0f51 482void i915_request_unsubmit(struct i915_request *request)
d6a2289d
CW
483{
484 struct intel_engine_cs *engine = request->engine;
485 unsigned long flags;
486
487 /* Will be called from irq-context when using foreign fences. */
422d7df4 488 spin_lock_irqsave(&engine->active.lock, flags);
d6a2289d 489
e61e0f51 490 __i915_request_unsubmit(request);
d6a2289d 491
422d7df4 492 spin_unlock_irqrestore(&engine->active.lock, flags);
5590af3e
CW
493}
494
23902e49 495static int __i915_sw_fence_call
d55ac5bf 496submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
23902e49 497{
e61e0f51 498 struct i915_request *request =
48bc2a4a 499 container_of(fence, typeof(*request), submit);
48bc2a4a
CW
500
501 switch (state) {
502 case FENCE_COMPLETE:
e61e0f51 503 trace_i915_request_submit(request);
ef468849
CW
504
505 if (unlikely(fence->error))
506 i915_request_skip(request, fence->error);
507
af7a8ffa 508 /*
e61e0f51
CW
509 * We need to serialize use of the submit_request() callback
510 * with its hotplugging performed during an emergency
511 * i915_gem_set_wedged(). We use the RCU mechanism to mark the
512 * critical section in order to force i915_gem_set_wedged() to
513 * wait until the submit_request() is completed before
514 * proceeding.
af7a8ffa
DV
515 */
516 rcu_read_lock();
d55ac5bf 517 request->engine->submit_request(request);
af7a8ffa 518 rcu_read_unlock();
48bc2a4a
CW
519 break;
520
521 case FENCE_FREE:
e61e0f51 522 i915_request_put(request);
48bc2a4a
CW
523 break;
524 }
525
23902e49
CW
526 return NOTIFY_DONE;
527}
528
b7404c7e
CW
529static int __i915_sw_fence_call
530semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
531{
532 struct i915_request *request =
533 container_of(fence, typeof(*request), semaphore);
534
535 switch (state) {
536 case FENCE_COMPLETE:
17db337f 537 i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
b7404c7e
CW
538 break;
539
540 case FENCE_FREE:
541 i915_request_put(request);
542 break;
543 }
544
545 return NOTIFY_DONE;
546}
547
e5dadff4 548static void retire_requests(struct intel_timeline *tl)
d22ba0cb
CW
549{
550 struct i915_request *rq, *rn;
551
e5dadff4 552 list_for_each_entry_safe(rq, rn, &tl->requests, link)
9db0c5ca 553 if (!i915_request_retire(rq))
d22ba0cb 554 break;
d22ba0cb
CW
555}
556
557static noinline struct i915_request *
e5dadff4 558request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
d22ba0cb 559{
d22ba0cb
CW
560 struct i915_request *rq;
561
e5dadff4 562 if (list_empty(&tl->requests))
d22ba0cb
CW
563 goto out;
564
2ccdf6a1
CW
565 if (!gfpflags_allow_blocking(gfp))
566 goto out;
567
9db0c5ca 568 /* Move our oldest request to the slab-cache (if not in use!) */
e5dadff4 569 rq = list_first_entry(&tl->requests, typeof(*rq), link);
9db0c5ca
CW
570 i915_request_retire(rq);
571
572 rq = kmem_cache_alloc(global.slab_requests,
573 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
574 if (rq)
575 return rq;
576
d22ba0cb 577 /* Ratelimit ourselves to prevent oom from malicious clients */
e5dadff4 578 rq = list_last_entry(&tl->requests, typeof(*rq), link);
d22ba0cb
CW
579 cond_synchronize_rcu(rq->rcustate);
580
581 /* Retire our old requests in the hope that we free some */
e5dadff4 582 retire_requests(tl);
d22ba0cb
CW
583
584out:
2ccdf6a1 585 return kmem_cache_alloc(global.slab_requests, gfp);
d22ba0cb
CW
586}
587
e61e0f51 588struct i915_request *
2ccdf6a1 589__i915_request_create(struct intel_context *ce, gfp_t gfp)
05235c53 590{
75d0a7f3 591 struct intel_timeline *tl = ce->timeline;
ebece753
CW
592 struct i915_request *rq;
593 u32 seqno;
05235c53
CW
594 int ret;
595
2ccdf6a1 596 might_sleep_if(gfpflags_allow_blocking(gfp));
28176ef4 597
2ccdf6a1
CW
598 /* Check that the caller provided an already pinned context */
599 __intel_context_pin(ce);
9b5f4e5e 600
e61e0f51
CW
601 /*
602 * Beware: Dragons be flying overhead.
5a198b8c
CW
603 *
604 * We use RCU to look up requests in flight. The lookups may
605 * race with the request being allocated from the slab freelist.
606 * That is the request we are writing to here, may be in the process
21950ee7 607 * of being read by __i915_active_request_get_rcu(). As such,
5a198b8c
CW
608 * we have to be very careful when overwriting the contents. During
609 * the RCU lookup, we change chase the request->engine pointer,
65e4760e 610 * read the request->global_seqno and increment the reference count.
5a198b8c
CW
611 *
612 * The reference count is incremented atomically. If it is zero,
613 * the lookup knows the request is unallocated and complete. Otherwise,
614 * it is either still in use, or has been reallocated and reset
f54d1867
CW
615 * with dma_fence_init(). This increment is safe for release as we
616 * check that the request we have a reference to and matches the active
5a198b8c
CW
617 * request.
618 *
619 * Before we increment the refcount, we chase the request->engine
620 * pointer. We must not call kmem_cache_zalloc() or else we set
621 * that pointer to NULL and cause a crash during the lookup. If
622 * we see the request is completed (based on the value of the
623 * old engine and seqno), the lookup is complete and reports NULL.
624 * If we decide the request is not completed (new engine or seqno),
625 * then we grab a reference and double check that it is still the
626 * active request - which it won't be and restart the lookup.
627 *
628 * Do not use kmem_cache_zalloc() here!
629 */
32eb6bcf 630 rq = kmem_cache_alloc(global.slab_requests,
2ccdf6a1 631 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
e61e0f51 632 if (unlikely(!rq)) {
e5dadff4 633 rq = request_alloc_slow(tl, gfp);
e61e0f51 634 if (!rq) {
31c70f97
CW
635 ret = -ENOMEM;
636 goto err_unreserve;
637 }
28176ef4 638 }
05235c53 639
f0c02c1b 640 ret = intel_timeline_get_seqno(tl, rq, &seqno);
ebece753
CW
641 if (ret)
642 goto err_free;
643
2ccdf6a1 644 rq->i915 = ce->engine->i915;
1fc44d9b 645 rq->hw_context = ce;
2ccdf6a1
CW
646 rq->gem_context = ce->gem_context;
647 rq->engine = ce->engine;
1fc44d9b 648 rq->ring = ce->ring;
89b6d183 649 rq->execution_mask = ce->engine->mask;
d19d71fc
CW
650
651 rcu_assign_pointer(rq->timeline, tl);
ebece753
CW
652 rq->hwsp_seqno = tl->hwsp_seqno;
653 rq->hwsp_cacheline = tl->hwsp_cacheline;
d19d71fc 654
ebece753 655 rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
73cb9701 656
e61e0f51 657 spin_lock_init(&rq->lock);
ebece753
CW
658 dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
659 tl->fence_context, seqno);
04769652 660
48bc2a4a 661 /* We bump the ref for the fence chain */
e61e0f51 662 i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
b7404c7e 663 i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
5590af3e 664
0c7112a0 665 i915_sched_node_init(&rq->sched);
52e54209 666
5a198b8c 667 /* No zalloc, must clear what we need by hand */
e61e0f51
CW
668 rq->file_priv = NULL;
669 rq->batch = NULL;
670 rq->capture_list = NULL;
2a98f4e6 671 rq->flags = 0;
5a198b8c 672
2ccdf6a1
CW
673 INIT_LIST_HEAD(&rq->execute_cb);
674
05235c53
CW
675 /*
676 * Reserve space in the ring buffer for all the commands required to
677 * eventually emit this request. This is to guarantee that the
e61e0f51 678 * i915_request_add() call can't fail. Note that the reserve may need
05235c53
CW
679 * to be redone if the request is not actually submitted straight
680 * away, e.g. because a GPU scheduler has deferred it.
ed2922c0
CW
681 *
682 * Note that due to how we add reserved_space to intel_ring_begin()
683 * we need to double our request to ensure that if we need to wrap
684 * around inside i915_request_add() there is sufficient space at
685 * the beginning of the ring as well.
05235c53 686 */
2ccdf6a1
CW
687 rq->reserved_space =
688 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
05235c53 689
2113184c
CW
690 /*
691 * Record the position of the start of the request so that
d045446d
CW
692 * should we detect the updated seqno part-way through the
693 * GPU processing the request, we never over-estimate the
694 * position of the head.
695 */
e61e0f51 696 rq->head = rq->ring->emit;
d045446d 697
2ccdf6a1 698 ret = rq->engine->request_alloc(rq);
b1c24a61
CW
699 if (ret)
700 goto err_unwind;
2113184c 701
b3ee09a4
CW
702 rq->infix = rq->ring->emit; /* end of header; start of user payload */
703
2ccdf6a1 704 intel_context_mark_active(ce);
e61e0f51 705 return rq;
05235c53 706
b1c24a61 707err_unwind:
1fc44d9b 708 ce->ring->emit = rq->head;
b1c24a61 709
1618bdb8 710 /* Make sure we didn't add ourselves to external state before freeing */
0c7112a0
CW
711 GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
712 GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
1618bdb8 713
ebece753 714err_free:
32eb6bcf 715 kmem_cache_free(global.slab_requests, rq);
28176ef4 716err_unreserve:
1fc44d9b 717 intel_context_unpin(ce);
8e637178 718 return ERR_PTR(ret);
05235c53
CW
719}
720
2ccdf6a1
CW
721struct i915_request *
722i915_request_create(struct intel_context *ce)
723{
724 struct i915_request *rq;
e5dadff4 725 struct intel_timeline *tl;
2ccdf6a1 726
e5dadff4
CW
727 tl = intel_context_timeline_lock(ce);
728 if (IS_ERR(tl))
729 return ERR_CAST(tl);
2ccdf6a1
CW
730
731 /* Move our oldest request to the slab-cache (if not in use!) */
e5dadff4
CW
732 rq = list_first_entry(&tl->requests, typeof(*rq), link);
733 if (!list_is_last(&rq->link, &tl->requests))
2ccdf6a1
CW
734 i915_request_retire(rq);
735
736 intel_context_enter(ce);
737 rq = __i915_request_create(ce, GFP_KERNEL);
738 intel_context_exit(ce); /* active reference transferred to request */
739 if (IS_ERR(rq))
740 goto err_unlock;
741
742 /* Check that we do not interrupt ourselves with a new request */
e5dadff4 743 rq->cookie = lockdep_pin_lock(&tl->mutex);
2ccdf6a1
CW
744
745 return rq;
746
747err_unlock:
e5dadff4 748 intel_context_timeline_unlock(tl);
2ccdf6a1
CW
749 return rq;
750}
751
0d90ccb7
CW
752static int
753i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
754{
6a79d848
CW
755 struct intel_timeline *tl;
756 struct dma_fence *fence;
757 int err;
0d90ccb7 758
6a79d848
CW
759 GEM_BUG_ON(i915_request_timeline(rq) ==
760 rcu_access_pointer(signal->timeline));
761
762 rcu_read_lock();
763 tl = rcu_dereference(signal->timeline);
764 if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref))
765 tl = NULL;
766 rcu_read_unlock();
767 if (!tl) /* already started or maybe even completed */
0d90ccb7
CW
768 return 0;
769
6a79d848
CW
770 fence = ERR_PTR(-EBUSY);
771 if (mutex_trylock(&tl->mutex)) {
772 fence = NULL;
773 if (!i915_request_started(signal) &&
774 !list_is_first(&signal->link, &tl->requests)) {
775 signal = list_prev_entry(signal, link);
776 fence = dma_fence_get(&signal->fence);
777 }
778 mutex_unlock(&tl->mutex);
779 }
780 intel_timeline_put(tl);
781 if (IS_ERR_OR_NULL(fence))
782 return PTR_ERR_OR_ZERO(fence);
783
784 err = 0;
785 if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
786 err = i915_sw_fence_await_dma_fence(&rq->submit,
787 fence, 0,
788 I915_FENCE_GFP);
789 dma_fence_put(fence);
790
791 return err;
0d90ccb7
CW
792}
793
ca6e56f6
CW
794static intel_engine_mask_t
795already_busywaiting(struct i915_request *rq)
796{
797 /*
798 * Polling a semaphore causes bus traffic, delaying other users of
799 * both the GPU and CPU. We want to limit the impact on others,
800 * while taking advantage of early submission to reduce GPU
801 * latency. Therefore we restrict ourselves to not using more
802 * than one semaphore from each source, and not using a semaphore
803 * if we have detected the engine is saturated (i.e. would not be
804 * submitted early and cause bus traffic reading an already passed
805 * semaphore).
806 *
807 * See the are-we-too-late? check in __i915_request_submit().
808 */
44d89409 809 return rq->sched.semaphores | rq->engine->saturated;
ca6e56f6
CW
810}
811
e8861964
CW
812static int
813emit_semaphore_wait(struct i915_request *to,
814 struct i915_request *from,
815 gfp_t gfp)
816{
c210e85b 817 const int has_token = INTEL_GEN(to->i915) >= 12;
e8861964 818 u32 hwsp_offset;
c210e85b 819 int len;
e8861964 820 u32 *cs;
e8861964 821
e8861964
CW
822 GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
823
7881e605 824 /* Just emit the first semaphore we see as request space is limited. */
ca6e56f6 825 if (already_busywaiting(to) & from->engine->mask)
6a79d848 826 goto await_fence;
7881e605 827
6a79d848
CW
828 if (i915_request_await_start(to, from) < 0)
829 goto await_fence;
0d90ccb7 830
c8a0e2ae 831 /* Only submit our spinner after the signaler is running! */
6a79d848
CW
832 if (__i915_request_await_execution(to, from, NULL, gfp))
833 goto await_fence;
e8861964 834
c8a0e2ae 835 /* We need to pin the signaler's HWSP until we are finished reading. */
6a79d848
CW
836 if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
837 goto await_fence;
e8861964 838
c210e85b
CW
839 len = 4;
840 if (has_token)
841 len += 2;
842
843 cs = intel_ring_begin(to, len);
e8861964
CW
844 if (IS_ERR(cs))
845 return PTR_ERR(cs);
846
847 /*
848 * Using greater-than-or-equal here means we have to worry
849 * about seqno wraparound. To side step that issue, we swap
850 * the timeline HWSP upon wrapping, so that everyone listening
851 * for the old (pre-wrap) values do not see the much smaller
852 * (post-wrap) values than they were expecting (and so wait
853 * forever).
854 */
c210e85b
CW
855 *cs++ = (MI_SEMAPHORE_WAIT |
856 MI_SEMAPHORE_GLOBAL_GTT |
857 MI_SEMAPHORE_POLL |
858 MI_SEMAPHORE_SAD_GTE_SDD) +
859 has_token;
e8861964
CW
860 *cs++ = from->fence.seqno;
861 *cs++ = hwsp_offset;
862 *cs++ = 0;
c210e85b
CW
863 if (has_token) {
864 *cs++ = 0;
865 *cs++ = MI_NOOP;
866 }
e8861964
CW
867
868 intel_ring_advance(to, cs);
7881e605
CW
869 to->sched.semaphores |= from->engine->mask;
870 to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
e8861964 871 return 0;
6a79d848
CW
872
873await_fence:
874 return i915_sw_fence_await_dma_fence(&to->submit,
875 &from->fence, 0,
876 I915_FENCE_GFP);
e8861964
CW
877}
878
a2bc4695 879static int
e61e0f51 880i915_request_await_request(struct i915_request *to, struct i915_request *from)
a2bc4695 881{
85e17f59 882 int ret;
a2bc4695
CW
883
884 GEM_BUG_ON(to == from);
ceae14bd 885 GEM_BUG_ON(to->timeline == from->timeline);
a2bc4695 886
e61e0f51 887 if (i915_request_completed(from))
ade0b0c9
CW
888 return 0;
889
52e54209 890 if (to->engine->schedule) {
32eb6bcf 891 ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
52e54209
CW
892 if (ret < 0)
893 return ret;
894 }
895
73cb9701
CW
896 if (to->engine == from->engine) {
897 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
898 &from->submit,
2abe2f84 899 I915_FENCE_GFP);
e8861964
CW
900 } else if (intel_engine_has_semaphores(to->engine) &&
901 to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
902 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
6faf5916
CW
903 } else {
904 ret = i915_sw_fence_await_dma_fence(&to->submit,
905 &from->fence, 0,
906 I915_FENCE_GFP);
a2bc4695 907 }
17db337f
CW
908 if (ret < 0)
909 return ret;
910
911 if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
912 ret = i915_sw_fence_await_dma_fence(&to->semaphore,
913 &from->fence, 0,
914 I915_FENCE_GFP);
915 if (ret < 0)
916 return ret;
917 }
a2bc4695 918
17db337f 919 return 0;
a2bc4695
CW
920}
921
b52992c0 922int
e61e0f51 923i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
b52992c0 924{
29ef3fa9
CW
925 struct dma_fence **child = &fence;
926 unsigned int nchild = 1;
b52992c0 927 int ret;
b52992c0 928
e61e0f51
CW
929 /*
930 * Note that if the fence-array was created in signal-on-any mode,
b52992c0
CW
931 * we should *not* decompose it into its individual fences. However,
932 * we don't currently store which mode the fence-array is operating
933 * in. Fortunately, the only user of signal-on-any is private to
934 * amdgpu and we should not see any incoming fence-array from
935 * sync-file being in signal-on-any mode.
936 */
29ef3fa9
CW
937 if (dma_fence_is_array(fence)) {
938 struct dma_fence_array *array = to_dma_fence_array(fence);
939
940 child = array->fences;
941 nchild = array->num_fences;
942 GEM_BUG_ON(!nchild);
943 }
b52992c0 944
29ef3fa9
CW
945 do {
946 fence = *child++;
947 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
948 continue;
b52992c0 949
ceae14bd
CW
950 /*
951 * Requests on the same timeline are explicitly ordered, along
e61e0f51 952 * with their dependencies, by i915_request_add() which ensures
ceae14bd
CW
953 * that requests are submitted in-order through each ring.
954 */
e61e0f51 955 if (fence->context == rq->fence.context)
ceae14bd
CW
956 continue;
957
47979480 958 /* Squash repeated waits to the same timelines */
cc337560 959 if (fence->context &&
d19d71fc
CW
960 intel_timeline_sync_is_later(i915_request_timeline(rq),
961 fence))
47979480
CW
962 continue;
963
29ef3fa9 964 if (dma_fence_is_i915(fence))
e61e0f51 965 ret = i915_request_await_request(rq, to_request(fence));
b52992c0 966 else
e61e0f51 967 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
0f7dc620 968 fence->context ? I915_FENCE_TIMEOUT : 0,
2abe2f84 969 I915_FENCE_GFP);
b52992c0
CW
970 if (ret < 0)
971 return ret;
47979480
CW
972
973 /* Record the latest fence used against each timeline */
cc337560 974 if (fence->context)
d19d71fc
CW
975 intel_timeline_sync_set(i915_request_timeline(rq),
976 fence);
29ef3fa9 977 } while (--nchild);
b52992c0
CW
978
979 return 0;
980}
981
f71e01a7
CW
982int
983i915_request_await_execution(struct i915_request *rq,
984 struct dma_fence *fence,
985 void (*hook)(struct i915_request *rq,
986 struct dma_fence *signal))
987{
988 struct dma_fence **child = &fence;
989 unsigned int nchild = 1;
990 int ret;
991
992 if (dma_fence_is_array(fence)) {
993 struct dma_fence_array *array = to_dma_fence_array(fence);
994
995 /* XXX Error for signal-on-any fence arrays */
996
997 child = array->fences;
998 nchild = array->num_fences;
999 GEM_BUG_ON(!nchild);
1000 }
1001
1002 do {
1003 fence = *child++;
1004 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1005 continue;
1006
1007 /*
1008 * We don't squash repeated fence dependencies here as we
1009 * want to run our callback in all cases.
1010 */
1011
1012 if (dma_fence_is_i915(fence))
1013 ret = __i915_request_await_execution(rq,
1014 to_request(fence),
1015 hook,
1016 I915_FENCE_GFP);
1017 else
1018 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
1019 I915_FENCE_TIMEOUT,
1020 GFP_KERNEL);
1021 if (ret < 0)
1022 return ret;
1023 } while (--nchild);
1024
1025 return 0;
1026}
1027
a2bc4695 1028/**
e61e0f51 1029 * i915_request_await_object - set this request to (async) wait upon a bo
a2bc4695
CW
1030 * @to: request we are wishing to use
1031 * @obj: object which may be in use on another ring.
d8802126 1032 * @write: whether the wait is on behalf of a writer
a2bc4695
CW
1033 *
1034 * This code is meant to abstract object synchronization with the GPU.
1035 * Conceptually we serialise writes between engines inside the GPU.
1036 * We only allow one engine to write into a buffer at any time, but
1037 * multiple readers. To ensure each has a coherent view of memory, we must:
1038 *
1039 * - If there is an outstanding write request to the object, the new
1040 * request must wait for it to complete (either CPU or in hw, requests
1041 * on the same ring will be naturally ordered).
1042 *
1043 * - If we are a write request (pending_write_domain is set), the new
1044 * request must wait for outstanding read requests to complete.
1045 *
1046 * Returns 0 if successful, else propagates up the lower layer error.
1047 */
1048int
e61e0f51
CW
1049i915_request_await_object(struct i915_request *to,
1050 struct drm_i915_gem_object *obj,
1051 bool write)
a2bc4695 1052{
d07f0e59
CW
1053 struct dma_fence *excl;
1054 int ret = 0;
a2bc4695
CW
1055
1056 if (write) {
d07f0e59
CW
1057 struct dma_fence **shared;
1058 unsigned int count, i;
1059
52791eee 1060 ret = dma_resv_get_fences_rcu(obj->base.resv,
d07f0e59
CW
1061 &excl, &count, &shared);
1062 if (ret)
1063 return ret;
1064
1065 for (i = 0; i < count; i++) {
e61e0f51 1066 ret = i915_request_await_dma_fence(to, shared[i]);
d07f0e59
CW
1067 if (ret)
1068 break;
1069
1070 dma_fence_put(shared[i]);
1071 }
1072
1073 for (; i < count; i++)
1074 dma_fence_put(shared[i]);
1075 kfree(shared);
a2bc4695 1076 } else {
52791eee 1077 excl = dma_resv_get_excl_rcu(obj->base.resv);
a2bc4695
CW
1078 }
1079
d07f0e59
CW
1080 if (excl) {
1081 if (ret == 0)
e61e0f51 1082 ret = i915_request_await_dma_fence(to, excl);
a2bc4695 1083
d07f0e59 1084 dma_fence_put(excl);
a2bc4695
CW
1085 }
1086
d07f0e59 1087 return ret;
a2bc4695
CW
1088}
1089
6dd7526f
CW
1090void i915_request_skip(struct i915_request *rq, int error)
1091{
1092 void *vaddr = rq->ring->vaddr;
1093 u32 head;
1094
1095 GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1096 dma_fence_set_error(&rq->fence, error);
1097
ef468849
CW
1098 if (rq->infix == rq->postfix)
1099 return;
1100
6dd7526f
CW
1101 /*
1102 * As this request likely depends on state from the lost
1103 * context, clear out all the user operations leaving the
1104 * breadcrumb at the end (so we get the fence notifications).
1105 */
1106 head = rq->infix;
1107 if (rq->postfix < head) {
1108 memset(vaddr + head, 0, rq->ring->size - head);
1109 head = 0;
1110 }
1111 memset(vaddr + head, 0, rq->postfix - head);
ef468849 1112 rq->infix = rq->postfix;
6dd7526f
CW
1113}
1114
ea593dbb
CW
1115static struct i915_request *
1116__i915_request_add_to_timeline(struct i915_request *rq)
1117{
d19d71fc 1118 struct intel_timeline *timeline = i915_request_timeline(rq);
ea593dbb
CW
1119 struct i915_request *prev;
1120
1121 /*
1122 * Dependency tracking and request ordering along the timeline
1123 * is special cased so that we can eliminate redundant ordering
1124 * operations while building the request (we know that the timeline
1125 * itself is ordered, and here we guarantee it).
1126 *
1127 * As we know we will need to emit tracking along the timeline,
1128 * we embed the hooks into our request struct -- at the cost of
1129 * having to have specialised no-allocation interfaces (which will
1130 * be beneficial elsewhere).
1131 *
1132 * A second benefit to open-coding i915_request_await_request is
1133 * that we can apply a slight variant of the rules specialised
1134 * for timelines that jump between engines (such as virtual engines).
1135 * If we consider the case of virtual engine, we must emit a dma-fence
1136 * to prevent scheduling of the second request until the first is
1137 * complete (to maximise our greedy late load balancing) and this
1138 * precludes optimising to use semaphores serialisation of a single
1139 * timeline across engines.
1140 */
b1e3177b
CW
1141 prev = to_request(__i915_active_fence_set(&timeline->last_request,
1142 &rq->fence));
ea593dbb
CW
1143 if (prev && !i915_request_completed(prev)) {
1144 if (is_power_of_2(prev->engine->mask | rq->engine->mask))
1145 i915_sw_fence_await_sw_fence(&rq->submit,
1146 &prev->submit,
1147 &rq->submitq);
1148 else
1149 __i915_sw_fence_await_dma_fence(&rq->submit,
1150 &prev->fence,
1151 &rq->dmaq);
1152 if (rq->engine->schedule)
1153 __i915_sched_node_add_dependency(&rq->sched,
1154 &prev->sched,
1155 &rq->dep,
1156 0);
1157 }
1158
ea593dbb 1159 list_add_tail(&rq->link, &timeline->requests);
ea593dbb 1160
2ccdf6a1
CW
1161 /*
1162 * Make sure that no request gazumped us - if it was allocated after
1163 * our i915_request_alloc() and called __i915_request_add() before
1164 * us, the timeline will hold its seqno which is later than ours.
1165 */
ea593dbb 1166 GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
ea593dbb
CW
1167
1168 return prev;
1169}
1170
05235c53
CW
1171/*
1172 * NB: This function is not allowed to fail. Doing so would mean the the
1173 * request is not being tracked for completion but the work itself is
1174 * going to happen on the hardware. This would be a Bad Thing(tm).
1175 */
2ccdf6a1 1176struct i915_request *__i915_request_commit(struct i915_request *rq)
05235c53 1177{
2ccdf6a1
CW
1178 struct intel_engine_cs *engine = rq->engine;
1179 struct intel_ring *ring = rq->ring;
73dec95e 1180 u32 *cs;
05235c53 1181
dd847a70 1182 GEM_TRACE("%s fence %llx:%lld\n",
2ccdf6a1 1183 engine->name, rq->fence.context, rq->fence.seqno);
c781c978 1184
05235c53
CW
1185 /*
1186 * To ensure that this call will not fail, space for its emissions
1187 * should already have been reserved in the ring buffer. Let the ring
1188 * know that it is time to use that space up.
1189 */
2ccdf6a1
CW
1190 GEM_BUG_ON(rq->reserved_space > ring->space);
1191 rq->reserved_space = 0;
e5dadff4 1192 rq->emitted_jiffies = jiffies;
05235c53 1193
8ac71d1d
CW
1194 /*
1195 * Record the position of the start of the breadcrumb so that
05235c53
CW
1196 * should we detect the updated seqno part-way through the
1197 * GPU processing the request, we never over-estimate the
d045446d 1198 * position of the ring's HEAD.
05235c53 1199 */
2ccdf6a1 1200 cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
73dec95e 1201 GEM_BUG_ON(IS_ERR(cs));
2ccdf6a1 1202 rq->postfix = intel_ring_offset(rq, cs);
05235c53 1203
e5dadff4 1204 return __i915_request_add_to_timeline(rq);
a79ca656
CW
1205}
1206
1207void __i915_request_queue(struct i915_request *rq,
1208 const struct i915_sched_attr *attr)
1209{
8ac71d1d
CW
1210 /*
1211 * Let the backend know a new request has arrived that may need
0de9136d
CW
1212 * to adjust the existing execution schedule due to a high priority
1213 * request - i.e. we may want to preempt the current request in order
1214 * to run a high priority dependency chain *before* we can execute this
1215 * request.
1216 *
1217 * This is called before the request is ready to run so that we can
1218 * decide whether to preempt the entire chain so that it is ready to
1219 * run at the earliest possible convenience.
1220 */
2ccdf6a1 1221 i915_sw_fence_commit(&rq->semaphore);
a79ca656
CW
1222 if (attr && rq->engine->schedule)
1223 rq->engine->schedule(rq, attr);
2ccdf6a1 1224 i915_sw_fence_commit(&rq->submit);
2ccdf6a1
CW
1225}
1226
1227void i915_request_add(struct i915_request *rq)
1228{
a79ca656 1229 struct i915_sched_attr attr = rq->gem_context->sched;
d19d71fc 1230 struct intel_timeline * const tl = i915_request_timeline(rq);
2ccdf6a1
CW
1231 struct i915_request *prev;
1232
e5dadff4
CW
1233 lockdep_assert_held(&tl->mutex);
1234 lockdep_unpin_lock(&tl->mutex, rq->cookie);
2ccdf6a1
CW
1235
1236 trace_i915_request_add(rq);
1237
1238 prev = __i915_request_commit(rq);
1239
a79ca656
CW
1240 /*
1241 * Boost actual workloads past semaphores!
1242 *
1243 * With semaphores we spin on one engine waiting for another,
1244 * simply to reduce the latency of starting our work when
1245 * the signaler completes. However, if there is any other
1246 * work that we could be doing on this engine instead, that
1247 * is better utilisation and will reduce the overall duration
1248 * of the current work. To avoid PI boosting a semaphore
1249 * far in the distance past over useful work, we keep a history
1250 * of any semaphore use along our dependency chain.
1251 */
1252 if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
1253 attr.priority |= I915_PRIORITY_NOSEMAPHORE;
1254
1255 /*
1256 * Boost priorities to new clients (new request flows).
1257 *
1258 * Allow interactive/synchronous clients to jump ahead of
1259 * the bulk clients. (FQ_CODEL)
1260 */
1261 if (list_empty(&rq->sched.signalers_list))
1262 attr.priority |= I915_PRIORITY_WAIT;
1263
62520e33 1264 local_bh_disable();
a79ca656 1265 __i915_request_queue(rq, &attr);
62520e33 1266 local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
a79ca656 1267
c22b355f
CW
1268 /*
1269 * In typical scenarios, we do not expect the previous request on
1270 * the timeline to be still tracked by timeline->last_request if it
1271 * has been completed. If the completed request is still here, that
1272 * implies that request retirement is a long way behind submission,
1273 * suggesting that we haven't been retiring frequently enough from
1274 * the combination of retire-before-alloc, waiters and the background
1275 * retirement worker. So if the last request on this timeline was
1276 * already completed, do a catch up pass, flushing the retirement queue
1277 * up to this client. Since we have now moved the heaviest operations
1278 * during retirement onto secondary workers, such as freeing objects
1279 * or contexts, retiring a bunch of requests is mostly list management
1280 * (and cache misses), and so we should not be overly penalizing this
1281 * client by performing excess work, though we may still performing
1282 * work on behalf of others -- but instead we should benefit from
1283 * improved resource management. (Well, that's the theory at least.)
1284 */
d19d71fc
CW
1285 if (prev &&
1286 i915_request_completed(prev) &&
1287 rcu_access_pointer(prev->timeline) == tl)
e61e0f51 1288 i915_request_retire_upto(prev);
3ef71149 1289
e5dadff4 1290 mutex_unlock(&tl->mutex);
05235c53
CW
1291}
1292
1293static unsigned long local_clock_us(unsigned int *cpu)
1294{
1295 unsigned long t;
1296
e61e0f51
CW
1297 /*
1298 * Cheaply and approximately convert from nanoseconds to microseconds.
05235c53
CW
1299 * The result and subsequent calculations are also defined in the same
1300 * approximate microseconds units. The principal source of timing
1301 * error here is from the simple truncation.
1302 *
1303 * Note that local_clock() is only defined wrt to the current CPU;
1304 * the comparisons are no longer valid if we switch CPUs. Instead of
1305 * blocking preemption for the entire busywait, we can detect the CPU
1306 * switch and use that as indicator of system load and a reason to
1307 * stop busywaiting, see busywait_stop().
1308 */
1309 *cpu = get_cpu();
1310 t = local_clock() >> 10;
1311 put_cpu();
1312
1313 return t;
1314}
1315
1316static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1317{
1318 unsigned int this_cpu;
1319
1320 if (time_after(local_clock_us(&this_cpu), timeout))
1321 return true;
1322
1323 return this_cpu != cpu;
1324}
1325
52c0fdb2
CW
1326static bool __i915_spin_request(const struct i915_request * const rq,
1327 int state, unsigned long timeout_us)
05235c53 1328{
52c0fdb2 1329 unsigned int cpu;
b2f2f0fc
CW
1330
1331 /*
1332 * Only wait for the request if we know it is likely to complete.
1333 *
1334 * We don't track the timestamps around requests, nor the average
1335 * request length, so we do not have a good indicator that this
1336 * request will complete within the timeout. What we do know is the
52c0fdb2
CW
1337 * order in which requests are executed by the context and so we can
1338 * tell if the request has been started. If the request is not even
1339 * running yet, it is a fair assumption that it will not complete
1340 * within our relatively short timeout.
b2f2f0fc 1341 */
52c0fdb2 1342 if (!i915_request_is_running(rq))
b2f2f0fc
CW
1343 return false;
1344
e61e0f51
CW
1345 /*
1346 * When waiting for high frequency requests, e.g. during synchronous
05235c53
CW
1347 * rendering split between the CPU and GPU, the finite amount of time
1348 * required to set up the irq and wait upon it limits the response
1349 * rate. By busywaiting on the request completion for a short while we
1350 * can service the high frequency waits as quick as possible. However,
1351 * if it is a slow request, we want to sleep as quickly as possible.
1352 * The tradeoff between waiting and sleeping is roughly the time it
1353 * takes to sleep on a request, on the order of a microsecond.
1354 */
1355
1356 timeout_us += local_clock_us(&cpu);
1357 do {
52c0fdb2
CW
1358 if (i915_request_completed(rq))
1359 return true;
c33ed067 1360
05235c53
CW
1361 if (signal_pending_state(state, current))
1362 break;
1363
1364 if (busywait_stop(timeout_us, cpu))
1365 break;
1366
f2f09a4c 1367 cpu_relax();
05235c53
CW
1368 } while (!need_resched());
1369
1370 return false;
1371}
1372
52c0fdb2
CW
1373struct request_wait {
1374 struct dma_fence_cb cb;
1375 struct task_struct *tsk;
1376};
1377
1378static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
1379{
1380 struct request_wait *wait = container_of(cb, typeof(*wait), cb);
1381
1382 wake_up_process(wait->tsk);
1383}
1384
05235c53 1385/**
e532be89 1386 * i915_request_wait - wait until execution of request has finished
e61e0f51 1387 * @rq: the request to wait upon
ea746f36 1388 * @flags: how to wait
e95433c7
CW
1389 * @timeout: how long to wait in jiffies
1390 *
e532be89 1391 * i915_request_wait() waits for the request to be completed, for a
e95433c7
CW
1392 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1393 * unbounded wait).
05235c53 1394 *
e95433c7
CW
1395 * Returns the remaining time (in jiffies) if the request completed, which may
1396 * be zero or -ETIME if the request is unfinished after the timeout expires.
1397 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1398 * pending before the request completes.
05235c53 1399 */
e61e0f51 1400long i915_request_wait(struct i915_request *rq,
e95433c7
CW
1401 unsigned int flags,
1402 long timeout)
05235c53 1403{
ea746f36
CW
1404 const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1405 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
52c0fdb2 1406 struct request_wait wait;
05235c53
CW
1407
1408 might_sleep();
e95433c7 1409 GEM_BUG_ON(timeout < 0);
05235c53 1410
6e4e9708 1411 if (dma_fence_is_signaled(&rq->fence))
e95433c7 1412 return timeout;
05235c53 1413
e95433c7
CW
1414 if (!timeout)
1415 return -ETIME;
05235c53 1416
e61e0f51 1417 trace_i915_request_wait_begin(rq, flags);
84383d2e
CW
1418
1419 /*
1420 * We must never wait on the GPU while holding a lock as we
1421 * may need to perform a GPU reset. So while we don't need to
1422 * serialise wait/reset with an explicit lock, we do want
1423 * lockdep to detect potential dependency cycles.
1424 */
cb823ed9 1425 mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
4680816b 1426
7ce99d24
CW
1427 /*
1428 * Optimistic spin before touching IRQs.
1429 *
1430 * We may use a rather large value here to offset the penalty of
1431 * switching away from the active task. Frequently, the client will
1432 * wait upon an old swapbuffer to throttle itself to remain within a
1433 * frame of the gpu. If the client is running in lockstep with the gpu,
1434 * then it should not be waiting long at all, and a sleep now will incur
1435 * extra scheduler latency in producing the next frame. To try to
1436 * avoid adding the cost of enabling/disabling the interrupt to the
1437 * short wait, we first spin to see if the request would have completed
1438 * in the time taken to setup the interrupt.
1439 *
1440 * We need upto 5us to enable the irq, and upto 20us to hide the
1441 * scheduler latency of a context switch, ignoring the secondary
1442 * impacts from a context switch such as cache eviction.
1443 *
1444 * The scheme used for low-latency IO is called "hybrid interrupt
1445 * polling". The suggestion there is to sleep until just before you
1446 * expect to be woken by the device interrupt and then poll for its
1447 * completion. That requires having a good predictor for the request
1448 * duration, which we currently lack.
1449 */
1450 if (CONFIG_DRM_I915_SPIN_REQUEST &&
6e4e9708
CW
1451 __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
1452 dma_fence_signal(&rq->fence);
52c0fdb2 1453 goto out;
6e4e9708 1454 }
541ca6ed 1455
62eb3c24
CW
1456 /*
1457 * This client is about to stall waiting for the GPU. In many cases
1458 * this is undesirable and limits the throughput of the system, as
1459 * many clients cannot continue processing user input/output whilst
1460 * blocked. RPS autotuning may take tens of milliseconds to respond
1461 * to the GPU load and thus incurs additional latency for the client.
1462 * We can circumvent that by promoting the GPU frequency to maximum
1463 * before we sleep. This makes the GPU throttle up much more quickly
1464 * (good for benchmarks and user experience, e.g. window animations),
1465 * but at a cost of spending more power processing the workload
1466 * (bad for battery).
1467 */
1468 if (flags & I915_WAIT_PRIORITY) {
1469 if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
1470 gen6_rps_boost(rq);
52c0fdb2 1471 i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
62eb3c24 1472 }
4680816b 1473
52c0fdb2
CW
1474 wait.tsk = current;
1475 if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
1476 goto out;
4680816b 1477
52c0fdb2
CW
1478 for (;;) {
1479 set_current_state(state);
05235c53 1480
ce94bef9
CW
1481 if (i915_request_completed(rq)) {
1482 dma_fence_signal(&rq->fence);
52c0fdb2 1483 break;
ce94bef9 1484 }
05235c53 1485
05235c53 1486 if (signal_pending_state(state, current)) {
e95433c7 1487 timeout = -ERESTARTSYS;
05235c53
CW
1488 break;
1489 }
1490
e95433c7
CW
1491 if (!timeout) {
1492 timeout = -ETIME;
05235c53
CW
1493 break;
1494 }
1495
19306502 1496 intel_engine_flush_submission(rq->engine);
e95433c7 1497 timeout = io_schedule_timeout(timeout);
05235c53 1498 }
a49625f9 1499 __set_current_state(TASK_RUNNING);
05235c53 1500
52c0fdb2
CW
1501 dma_fence_remove_callback(&rq->fence, &wait.cb);
1502
1503out:
cb823ed9 1504 mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
52c0fdb2 1505 trace_i915_request_wait_end(rq);
e95433c7 1506 return timeout;
05235c53 1507}
4b8de8e6 1508
c835c550
CW
1509#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1510#include "selftests/mock_request.c"
e61e0f51 1511#include "selftests/i915_request.c"
c835c550 1512#endif
32eb6bcf 1513
103b76ee
CW
1514static void i915_global_request_shrink(void)
1515{
1516 kmem_cache_shrink(global.slab_dependencies);
1517 kmem_cache_shrink(global.slab_execute_cbs);
1518 kmem_cache_shrink(global.slab_requests);
1519}
1520
1521static void i915_global_request_exit(void)
1522{
1523 kmem_cache_destroy(global.slab_dependencies);
1524 kmem_cache_destroy(global.slab_execute_cbs);
1525 kmem_cache_destroy(global.slab_requests);
1526}
1527
1528static struct i915_global_request global = { {
1529 .shrink = i915_global_request_shrink,
1530 .exit = i915_global_request_exit,
1531} };
1532
32eb6bcf
CW
1533int __init i915_global_request_init(void)
1534{
1535 global.slab_requests = KMEM_CACHE(i915_request,
1536 SLAB_HWCACHE_ALIGN |
1537 SLAB_RECLAIM_ACCOUNT |
1538 SLAB_TYPESAFE_BY_RCU);
1539 if (!global.slab_requests)
1540 return -ENOMEM;
1541
e8861964
CW
1542 global.slab_execute_cbs = KMEM_CACHE(execute_cb,
1543 SLAB_HWCACHE_ALIGN |
1544 SLAB_RECLAIM_ACCOUNT |
1545 SLAB_TYPESAFE_BY_RCU);
1546 if (!global.slab_execute_cbs)
1547 goto err_requests;
1548
32eb6bcf
CW
1549 global.slab_dependencies = KMEM_CACHE(i915_dependency,
1550 SLAB_HWCACHE_ALIGN |
1551 SLAB_RECLAIM_ACCOUNT);
1552 if (!global.slab_dependencies)
e8861964 1553 goto err_execute_cbs;
32eb6bcf 1554
103b76ee 1555 i915_global_register(&global.base);
32eb6bcf
CW
1556 return 0;
1557
e8861964
CW
1558err_execute_cbs:
1559 kmem_cache_destroy(global.slab_execute_cbs);
32eb6bcf
CW
1560err_requests:
1561 kmem_cache_destroy(global.slab_requests);
1562 return -ENOMEM;
1563}