drm/i915/tc: un-inline intel_tc_port_ref_held()
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_request.c
CommitLineData
05235c53
CW
1/*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
b52992c0 25#include <linux/dma-fence-array.h>
e8861964
CW
26#include <linux/irq_work.h>
27#include <linux/prefetch.h>
e6017571
IM
28#include <linux/sched.h>
29#include <linux/sched/clock.h>
f361bf4a 30#include <linux/sched/signal.h>
fa545cbf 31
10be98a7
CW
32#include "gem/i915_gem_context.h"
33#include "gt/intel_context.h"
34
21950ee7 35#include "i915_active.h"
696173b0 36#include "i915_drv.h"
103b76ee 37#include "i915_globals.h"
696173b0 38#include "intel_pm.h"
05235c53 39
e8861964
CW
40struct execute_cb {
41 struct list_head link;
42 struct irq_work work;
43 struct i915_sw_fence *fence;
f71e01a7
CW
44 void (*hook)(struct i915_request *rq, struct dma_fence *signal);
45 struct i915_request *signal;
e8861964
CW
46};
47
32eb6bcf 48static struct i915_global_request {
103b76ee 49 struct i915_global base;
32eb6bcf
CW
50 struct kmem_cache *slab_requests;
51 struct kmem_cache *slab_dependencies;
e8861964 52 struct kmem_cache *slab_execute_cbs;
32eb6bcf
CW
53} global;
54
f54d1867 55static const char *i915_fence_get_driver_name(struct dma_fence *fence)
04769652
CW
56{
57 return "i915";
58}
59
f54d1867 60static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
04769652 61{
e61e0f51
CW
62 /*
63 * The timeline struct (as part of the ppgtt underneath a context)
05506b5b
CW
64 * may be freed when the request is no longer in use by the GPU.
65 * We could extend the life of a context to beyond that of all
66 * fences, possibly keeping the hw resource around indefinitely,
67 * or we just give them a false name. Since
68 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
69 * lie seems justifiable.
70 */
71 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
72 return "signaled";
73
4daffb66 74 return to_request(fence)->gem_context->name ?: "[i915]";
04769652
CW
75}
76
f54d1867 77static bool i915_fence_signaled(struct dma_fence *fence)
04769652 78{
e61e0f51 79 return i915_request_completed(to_request(fence));
04769652
CW
80}
81
f54d1867 82static bool i915_fence_enable_signaling(struct dma_fence *fence)
04769652 83{
52c0fdb2 84 return i915_request_enable_breadcrumb(to_request(fence));
04769652
CW
85}
86
f54d1867 87static signed long i915_fence_wait(struct dma_fence *fence,
04769652 88 bool interruptible,
e95433c7 89 signed long timeout)
04769652 90{
62eb3c24
CW
91 return i915_request_wait(to_request(fence),
92 interruptible | I915_WAIT_PRIORITY,
93 timeout);
04769652
CW
94}
95
f54d1867 96static void i915_fence_release(struct dma_fence *fence)
04769652 97{
e61e0f51 98 struct i915_request *rq = to_request(fence);
04769652 99
e61e0f51
CW
100 /*
101 * The request is put onto a RCU freelist (i.e. the address
fc158405
CW
102 * is immediately reused), mark the fences as being freed now.
103 * Otherwise the debugobjects for the fences are only marked as
104 * freed when the slab cache itself is freed, and so we would get
105 * caught trying to reuse dead objects.
106 */
e61e0f51 107 i915_sw_fence_fini(&rq->submit);
0c441cb6 108 i915_sw_fence_fini(&rq->semaphore);
fc158405 109
32eb6bcf 110 kmem_cache_free(global.slab_requests, rq);
04769652
CW
111}
112
f54d1867 113const struct dma_fence_ops i915_fence_ops = {
04769652
CW
114 .get_driver_name = i915_fence_get_driver_name,
115 .get_timeline_name = i915_fence_get_timeline_name,
116 .enable_signaling = i915_fence_enable_signaling,
117 .signaled = i915_fence_signaled,
118 .wait = i915_fence_wait,
119 .release = i915_fence_release,
04769652
CW
120};
121
b87b6c0d
CW
122static void irq_execute_cb(struct irq_work *wrk)
123{
124 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
125
126 i915_sw_fence_complete(cb->fence);
127 kmem_cache_free(global.slab_execute_cbs, cb);
128}
129
130static void irq_execute_cb_hook(struct irq_work *wrk)
131{
132 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
133
134 cb->hook(container_of(cb->fence, struct i915_request, submit),
135 &cb->signal->fence);
136 i915_request_put(cb->signal);
137
138 irq_execute_cb(wrk);
139}
140
141static void __notify_execute_cb(struct i915_request *rq)
142{
143 struct execute_cb *cb;
144
145 lockdep_assert_held(&rq->lock);
146
147 if (list_empty(&rq->execute_cb))
148 return;
149
150 list_for_each_entry(cb, &rq->execute_cb, link)
151 irq_work_queue(&cb->work);
152
153 /*
154 * XXX Rollback on __i915_request_unsubmit()
155 *
156 * In the future, perhaps when we have an active time-slicing scheduler,
157 * it will be interesting to unsubmit parallel execution and remove
158 * busywaits from the GPU until their master is restarted. This is
159 * quite hairy, we have to carefully rollback the fence and do a
160 * preempt-to-idle cycle on the target engine, all the while the
161 * master execute_cb may refire.
162 */
163 INIT_LIST_HEAD(&rq->execute_cb);
164}
165
05235c53 166static inline void
e61e0f51 167i915_request_remove_from_client(struct i915_request *request)
05235c53 168{
c8659efa 169 struct drm_i915_file_private *file_priv;
05235c53 170
c8659efa 171 file_priv = request->file_priv;
05235c53
CW
172 if (!file_priv)
173 return;
174
175 spin_lock(&file_priv->mm.lock);
c8659efa
CW
176 if (request->file_priv) {
177 list_del(&request->client_link);
178 request->file_priv = NULL;
179 }
05235c53 180 spin_unlock(&file_priv->mm.lock);
05235c53
CW
181}
182
e61e0f51 183static void advance_ring(struct i915_request *request)
cbb60b4b 184{
b887d615 185 struct intel_ring *ring = request->ring;
cbb60b4b
CW
186 unsigned int tail;
187
e61e0f51
CW
188 /*
189 * We know the GPU must have read the request to have
cbb60b4b
CW
190 * sent us the seqno + interrupt, so use the position
191 * of tail of the request to update the last known position
192 * of the GPU head.
193 *
194 * Note this requires that we are always called in request
195 * completion order.
196 */
b887d615
CW
197 GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
198 if (list_is_last(&request->ring_link, &ring->request_list)) {
e61e0f51
CW
199 /*
200 * We may race here with execlists resubmitting this request
e6ba9992
CW
201 * as we retire it. The resubmission will move the ring->tail
202 * forwards (to request->wa_tail). We either read the
203 * current value that was written to hw, or the value that
204 * is just about to be. Either works, if we miss the last two
205 * noops - they are safe to be replayed on a reset.
206 */
36620032 207 tail = READ_ONCE(request->tail);
643b450a 208 list_del(&ring->active_link);
e6ba9992 209 } else {
cbb60b4b 210 tail = request->postfix;
e6ba9992 211 }
b887d615 212 list_del_init(&request->ring_link);
cbb60b4b 213
b887d615 214 ring->head = tail;
cbb60b4b
CW
215}
216
e61e0f51 217static void free_capture_list(struct i915_request *request)
b0fd47ad 218{
e61e0f51 219 struct i915_capture_list *capture;
b0fd47ad
CW
220
221 capture = request->capture_list;
222 while (capture) {
e61e0f51 223 struct i915_capture_list *next = capture->next;
b0fd47ad
CW
224
225 kfree(capture);
226 capture = next;
227 }
228}
229
9db0c5ca 230static bool i915_request_retire(struct i915_request *rq)
05235c53 231{
21950ee7 232 struct i915_active_request *active, *next;
fa545cbf 233
9db0c5ca
CW
234 lockdep_assert_held(&rq->i915->drm.struct_mutex);
235 if (!i915_request_completed(rq))
236 return false;
d9b13c4d 237
9db0c5ca
CW
238 GEM_TRACE("%s fence %llx:%lld, current %d\n",
239 rq->engine->name,
240 rq->fence.context, rq->fence.seqno,
241 hwsp_seqno(rq));
4c7d62c6 242
9db0c5ca
CW
243 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
244 trace_i915_request_retire(rq);
80b204bc 245
9db0c5ca 246 advance_ring(rq);
b0fd47ad 247
e61e0f51
CW
248 /*
249 * Walk through the active list, calling retire on each. This allows
fa545cbf
CW
250 * objects to track their GPU activity and mark themselves as idle
251 * when their *last* active request is completed (updating state
252 * tracking lists for eviction, active references for GEM, etc).
253 *
254 * As the ->retire() may free the node, we decouple it first and
255 * pass along the auxiliary information (to avoid dereferencing
256 * the node after the callback).
257 */
9db0c5ca 258 list_for_each_entry_safe(active, next, &rq->active_list, link) {
e61e0f51
CW
259 /*
260 * In microbenchmarks or focusing upon time inside the kernel,
fa545cbf
CW
261 * we may spend an inordinate amount of time simply handling
262 * the retirement of requests and processing their callbacks.
263 * Of which, this loop itself is particularly hot due to the
21950ee7
CW
264 * cache misses when jumping around the list of
265 * i915_active_request. So we try to keep this loop as
266 * streamlined as possible and also prefetch the next
267 * i915_active_request to try and hide the likely cache miss.
fa545cbf
CW
268 */
269 prefetchw(next);
270
271 INIT_LIST_HEAD(&active->link);
0eafec6d 272 RCU_INIT_POINTER(active->request, NULL);
fa545cbf 273
9db0c5ca 274 active->retire(active, rq);
fa545cbf
CW
275 }
276
9db0c5ca 277 local_irq_disable();
05235c53 278
22b7a426
CW
279 /*
280 * We only loosely track inflight requests across preemption,
281 * and so we may find ourselves attempting to retire a _completed_
282 * request that we have removed from the HW and put back on a run
283 * queue.
284 */
422d7df4
CW
285 spin_lock(&rq->engine->active.lock);
286 list_del(&rq->sched.link);
287 spin_unlock(&rq->engine->active.lock);
52e54209 288
9db0c5ca
CW
289 spin_lock(&rq->lock);
290 i915_request_mark_complete(rq);
291 if (!i915_request_signaled(rq))
292 dma_fence_signal_locked(&rq->fence);
293 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
294 i915_request_cancel_breadcrumb(rq);
2a98f4e6 295 if (i915_request_has_waitboost(rq)) {
9db0c5ca
CW
296 GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
297 atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
298 }
b87b6c0d
CW
299 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
300 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
301 __notify_execute_cb(rq);
302 }
303 GEM_BUG_ON(!list_empty(&rq->execute_cb));
9db0c5ca
CW
304 spin_unlock(&rq->lock);
305
306 local_irq_enable();
52d7f16e 307
9db0c5ca
CW
308 intel_context_exit(rq->hw_context);
309 intel_context_unpin(rq->hw_context);
310
311 i915_request_remove_from_client(rq);
422d7df4 312 list_del(&rq->link);
9db0c5ca
CW
313
314 free_capture_list(rq);
315 i915_sched_node_fini(&rq->sched);
316 i915_request_put(rq);
317
318 return true;
05235c53
CW
319}
320
e61e0f51 321void i915_request_retire_upto(struct i915_request *rq)
05235c53 322{
b887d615 323 struct intel_ring *ring = rq->ring;
e61e0f51 324 struct i915_request *tmp;
05235c53 325
b300fde8 326 GEM_TRACE("%s fence %llx:%lld, current %d\n",
b887d615
CW
327 rq->engine->name,
328 rq->fence.context, rq->fence.seqno,
8892f477 329 hwsp_seqno(rq));
b887d615 330
e61e0f51
CW
331 lockdep_assert_held(&rq->i915->drm.struct_mutex);
332 GEM_BUG_ON(!i915_request_completed(rq));
4ffd6e0c 333
b887d615 334 if (list_empty(&rq->ring_link))
e95433c7 335 return;
05235c53
CW
336
337 do {
b887d615
CW
338 tmp = list_first_entry(&ring->request_list,
339 typeof(*tmp), ring_link);
9db0c5ca 340 } while (i915_request_retire(tmp) && tmp != rq);
05235c53
CW
341}
342
e8861964 343static int
f71e01a7
CW
344__i915_request_await_execution(struct i915_request *rq,
345 struct i915_request *signal,
346 void (*hook)(struct i915_request *rq,
347 struct dma_fence *signal),
348 gfp_t gfp)
e8861964
CW
349{
350 struct execute_cb *cb;
351
f71e01a7
CW
352 if (i915_request_is_active(signal)) {
353 if (hook)
354 hook(rq, &signal->fence);
e8861964 355 return 0;
f71e01a7 356 }
e8861964
CW
357
358 cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
359 if (!cb)
360 return -ENOMEM;
361
362 cb->fence = &rq->submit;
363 i915_sw_fence_await(cb->fence);
364 init_irq_work(&cb->work, irq_execute_cb);
365
f71e01a7
CW
366 if (hook) {
367 cb->hook = hook;
368 cb->signal = i915_request_get(signal);
369 cb->work.func = irq_execute_cb_hook;
370 }
371
e8861964
CW
372 spin_lock_irq(&signal->lock);
373 if (i915_request_is_active(signal)) {
f71e01a7
CW
374 if (hook) {
375 hook(rq, &signal->fence);
376 i915_request_put(signal);
377 }
e8861964
CW
378 i915_sw_fence_complete(cb->fence);
379 kmem_cache_free(global.slab_execute_cbs, cb);
380 } else {
381 list_add_tail(&cb->link, &signal->execute_cb);
382 }
383 spin_unlock_irq(&signal->lock);
384
385 return 0;
386}
387
e61e0f51 388void __i915_request_submit(struct i915_request *request)
5590af3e 389{
73cb9701 390 struct intel_engine_cs *engine = request->engine;
5590af3e 391
422d7df4 392 GEM_TRACE("%s fence %llx:%lld, current %d\n",
e7702760 393 engine->name,
d9b13c4d 394 request->fence.context, request->fence.seqno,
8892f477 395 hwsp_seqno(request));
d9b13c4d 396
e60a870d 397 GEM_BUG_ON(!irqs_disabled());
422d7df4 398 lockdep_assert_held(&engine->active.lock);
e60a870d 399
d9e61b66
CW
400 if (i915_gem_context_is_banned(request->gem_context))
401 i915_request_skip(request, -EIO);
402
ca6e56f6
CW
403 /*
404 * Are we using semaphores when the gpu is already saturated?
405 *
406 * Using semaphores incurs a cost in having the GPU poll a
407 * memory location, busywaiting for it to change. The continual
408 * memory reads can have a noticeable impact on the rest of the
409 * system with the extra bus traffic, stalling the cpu as it too
410 * tries to access memory across the bus (perf stat -e bus-cycles).
411 *
412 * If we installed a semaphore on this request and we only submit
413 * the request after the signaler completed, that indicates the
414 * system is overloaded and using semaphores at this time only
415 * increases the amount of work we are doing. If so, we disable
416 * further use of semaphores until we are idle again, whence we
417 * optimistically try again.
418 */
419 if (request->sched.semaphores &&
420 i915_sw_fence_signaled(&request->semaphore))
44d89409 421 engine->saturated |= request->sched.semaphores;
ca6e56f6 422
f2d13290
CW
423 /* We may be recursing from the signal callback of another i915 fence */
424 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
b5773a36 425
422d7df4
CW
426 list_move_tail(&request->sched.link, &engine->active.requests);
427
52c0fdb2
CW
428 GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
429 set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
b5773a36 430
52c0fdb2 431 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
0152b3b3 432 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
52c0fdb2
CW
433 !i915_request_enable_breadcrumb(request))
434 intel_engine_queue_breadcrumbs(engine);
b5773a36 435
e8861964
CW
436 __notify_execute_cb(request);
437
f2d13290
CW
438 spin_unlock(&request->lock);
439
85474441
CW
440 engine->emit_fini_breadcrumb(request,
441 request->ring->vaddr + request->postfix);
5590af3e 442
79ffac85
CW
443 engine->serial++;
444
e61e0f51 445 trace_i915_request_execute(request);
d55ac5bf
CW
446}
447
e61e0f51 448void i915_request_submit(struct i915_request *request)
d55ac5bf
CW
449{
450 struct intel_engine_cs *engine = request->engine;
451 unsigned long flags;
23902e49 452
d55ac5bf 453 /* Will be called from irq-context when using foreign fences. */
422d7df4 454 spin_lock_irqsave(&engine->active.lock, flags);
d55ac5bf 455
e61e0f51 456 __i915_request_submit(request);
d55ac5bf 457
422d7df4 458 spin_unlock_irqrestore(&engine->active.lock, flags);
d55ac5bf
CW
459}
460
e61e0f51 461void __i915_request_unsubmit(struct i915_request *request)
d55ac5bf 462{
d6a2289d 463 struct intel_engine_cs *engine = request->engine;
d55ac5bf 464
b300fde8 465 GEM_TRACE("%s fence %llx:%lld, current %d\n",
e7702760 466 engine->name,
d9b13c4d 467 request->fence.context, request->fence.seqno,
8892f477 468 hwsp_seqno(request));
d9b13c4d 469
e60a870d 470 GEM_BUG_ON(!irqs_disabled());
422d7df4 471 lockdep_assert_held(&engine->active.lock);
48bc2a4a 472
e61e0f51
CW
473 /*
474 * Only unwind in reverse order, required so that the per-context list
d6a2289d
CW
475 * is kept in seqno/ring order.
476 */
80b204bc 477
d6a2289d
CW
478 /* We may be recursing from the signal callback of another i915 fence */
479 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
b5773a36 480
d6a2289d 481 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
52c0fdb2 482 i915_request_cancel_breadcrumb(request);
b5773a36 483
52c0fdb2
CW
484 GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
485 clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
b5773a36 486
d6a2289d
CW
487 spin_unlock(&request->lock);
488
dba5a7f3
CW
489 /* We've already spun, don't charge on resubmitting. */
490 if (request->sched.semaphores && i915_request_started(request)) {
491 request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
492 request->sched.semaphores = 0;
493 }
494
e61e0f51
CW
495 /*
496 * We don't need to wake_up any waiters on request->execute, they
d6a2289d 497 * will get woken by any other event or us re-adding this request
e61e0f51 498 * to the engine timeline (__i915_request_submit()). The waiters
d6a2289d
CW
499 * should be quite adapt at finding that the request now has a new
500 * global_seqno to the one they went to sleep on.
501 */
502}
503
e61e0f51 504void i915_request_unsubmit(struct i915_request *request)
d6a2289d
CW
505{
506 struct intel_engine_cs *engine = request->engine;
507 unsigned long flags;
508
509 /* Will be called from irq-context when using foreign fences. */
422d7df4 510 spin_lock_irqsave(&engine->active.lock, flags);
d6a2289d 511
e61e0f51 512 __i915_request_unsubmit(request);
d6a2289d 513
422d7df4 514 spin_unlock_irqrestore(&engine->active.lock, flags);
5590af3e
CW
515}
516
23902e49 517static int __i915_sw_fence_call
d55ac5bf 518submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
23902e49 519{
e61e0f51 520 struct i915_request *request =
48bc2a4a 521 container_of(fence, typeof(*request), submit);
48bc2a4a
CW
522
523 switch (state) {
524 case FENCE_COMPLETE:
e61e0f51 525 trace_i915_request_submit(request);
af7a8ffa 526 /*
e61e0f51
CW
527 * We need to serialize use of the submit_request() callback
528 * with its hotplugging performed during an emergency
529 * i915_gem_set_wedged(). We use the RCU mechanism to mark the
530 * critical section in order to force i915_gem_set_wedged() to
531 * wait until the submit_request() is completed before
532 * proceeding.
af7a8ffa
DV
533 */
534 rcu_read_lock();
d55ac5bf 535 request->engine->submit_request(request);
af7a8ffa 536 rcu_read_unlock();
48bc2a4a
CW
537 break;
538
539 case FENCE_FREE:
e61e0f51 540 i915_request_put(request);
48bc2a4a
CW
541 break;
542 }
543
23902e49
CW
544 return NOTIFY_DONE;
545}
546
b7404c7e
CW
547static int __i915_sw_fence_call
548semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
549{
550 struct i915_request *request =
551 container_of(fence, typeof(*request), semaphore);
552
553 switch (state) {
554 case FENCE_COMPLETE:
17db337f 555 i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
b7404c7e
CW
556 break;
557
558 case FENCE_FREE:
559 i915_request_put(request);
560 break;
561 }
562
563 return NOTIFY_DONE;
564}
565
d22ba0cb
CW
566static void ring_retire_requests(struct intel_ring *ring)
567{
568 struct i915_request *rq, *rn;
569
9db0c5ca
CW
570 list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
571 if (!i915_request_retire(rq))
d22ba0cb 572 break;
d22ba0cb
CW
573}
574
575static noinline struct i915_request *
2ccdf6a1 576request_alloc_slow(struct intel_context *ce, gfp_t gfp)
d22ba0cb
CW
577{
578 struct intel_ring *ring = ce->ring;
579 struct i915_request *rq;
580
581 if (list_empty(&ring->request_list))
582 goto out;
583
2ccdf6a1
CW
584 if (!gfpflags_allow_blocking(gfp))
585 goto out;
586
9db0c5ca
CW
587 /* Move our oldest request to the slab-cache (if not in use!) */
588 rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
589 i915_request_retire(rq);
590
591 rq = kmem_cache_alloc(global.slab_requests,
592 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
593 if (rq)
594 return rq;
595
d22ba0cb
CW
596 /* Ratelimit ourselves to prevent oom from malicious clients */
597 rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
598 cond_synchronize_rcu(rq->rcustate);
599
600 /* Retire our old requests in the hope that we free some */
601 ring_retire_requests(ring);
602
603out:
2ccdf6a1 604 return kmem_cache_alloc(global.slab_requests, gfp);
d22ba0cb
CW
605}
606
e61e0f51 607struct i915_request *
2ccdf6a1 608__i915_request_create(struct intel_context *ce, gfp_t gfp)
05235c53 609{
f0c02c1b 610 struct intel_timeline *tl = ce->ring->timeline;
ebece753
CW
611 struct i915_request *rq;
612 u32 seqno;
05235c53
CW
613 int ret;
614
2ccdf6a1 615 might_sleep_if(gfpflags_allow_blocking(gfp));
28176ef4 616
2ccdf6a1
CW
617 /* Check that the caller provided an already pinned context */
618 __intel_context_pin(ce);
9b5f4e5e 619
e61e0f51
CW
620 /*
621 * Beware: Dragons be flying overhead.
5a198b8c
CW
622 *
623 * We use RCU to look up requests in flight. The lookups may
624 * race with the request being allocated from the slab freelist.
625 * That is the request we are writing to here, may be in the process
21950ee7 626 * of being read by __i915_active_request_get_rcu(). As such,
5a198b8c
CW
627 * we have to be very careful when overwriting the contents. During
628 * the RCU lookup, we change chase the request->engine pointer,
65e4760e 629 * read the request->global_seqno and increment the reference count.
5a198b8c
CW
630 *
631 * The reference count is incremented atomically. If it is zero,
632 * the lookup knows the request is unallocated and complete. Otherwise,
633 * it is either still in use, or has been reallocated and reset
f54d1867
CW
634 * with dma_fence_init(). This increment is safe for release as we
635 * check that the request we have a reference to and matches the active
5a198b8c
CW
636 * request.
637 *
638 * Before we increment the refcount, we chase the request->engine
639 * pointer. We must not call kmem_cache_zalloc() or else we set
640 * that pointer to NULL and cause a crash during the lookup. If
641 * we see the request is completed (based on the value of the
642 * old engine and seqno), the lookup is complete and reports NULL.
643 * If we decide the request is not completed (new engine or seqno),
644 * then we grab a reference and double check that it is still the
645 * active request - which it won't be and restart the lookup.
646 *
647 * Do not use kmem_cache_zalloc() here!
648 */
32eb6bcf 649 rq = kmem_cache_alloc(global.slab_requests,
2ccdf6a1 650 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
e61e0f51 651 if (unlikely(!rq)) {
2ccdf6a1 652 rq = request_alloc_slow(ce, gfp);
e61e0f51 653 if (!rq) {
31c70f97
CW
654 ret = -ENOMEM;
655 goto err_unreserve;
656 }
28176ef4 657 }
05235c53 658
f0c02c1b 659 ret = intel_timeline_get_seqno(tl, rq, &seqno);
ebece753
CW
660 if (ret)
661 goto err_free;
662
2ccdf6a1 663 rq->i915 = ce->engine->i915;
1fc44d9b 664 rq->hw_context = ce;
2ccdf6a1
CW
665 rq->gem_context = ce->gem_context;
666 rq->engine = ce->engine;
1fc44d9b 667 rq->ring = ce->ring;
ebece753 668 rq->timeline = tl;
ebece753
CW
669 rq->hwsp_seqno = tl->hwsp_seqno;
670 rq->hwsp_cacheline = tl->hwsp_cacheline;
671 rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
73cb9701 672
e61e0f51 673 spin_lock_init(&rq->lock);
ebece753
CW
674 dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
675 tl->fence_context, seqno);
04769652 676
48bc2a4a 677 /* We bump the ref for the fence chain */
e61e0f51 678 i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
b7404c7e 679 i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
5590af3e 680
0c7112a0 681 i915_sched_node_init(&rq->sched);
52e54209 682
5a198b8c 683 /* No zalloc, must clear what we need by hand */
e61e0f51
CW
684 rq->file_priv = NULL;
685 rq->batch = NULL;
686 rq->capture_list = NULL;
2a98f4e6 687 rq->flags = 0;
78e41ddd 688 rq->execution_mask = ALL_ENGINES;
5a198b8c 689
2ccdf6a1
CW
690 INIT_LIST_HEAD(&rq->active_list);
691 INIT_LIST_HEAD(&rq->execute_cb);
692
05235c53
CW
693 /*
694 * Reserve space in the ring buffer for all the commands required to
695 * eventually emit this request. This is to guarantee that the
e61e0f51 696 * i915_request_add() call can't fail. Note that the reserve may need
05235c53
CW
697 * to be redone if the request is not actually submitted straight
698 * away, e.g. because a GPU scheduler has deferred it.
ed2922c0
CW
699 *
700 * Note that due to how we add reserved_space to intel_ring_begin()
701 * we need to double our request to ensure that if we need to wrap
702 * around inside i915_request_add() there is sufficient space at
703 * the beginning of the ring as well.
05235c53 704 */
2ccdf6a1
CW
705 rq->reserved_space =
706 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
05235c53 707
2113184c
CW
708 /*
709 * Record the position of the start of the request so that
d045446d
CW
710 * should we detect the updated seqno part-way through the
711 * GPU processing the request, we never over-estimate the
712 * position of the head.
713 */
e61e0f51 714 rq->head = rq->ring->emit;
d045446d 715
2ccdf6a1 716 ret = rq->engine->request_alloc(rq);
b1c24a61
CW
717 if (ret)
718 goto err_unwind;
2113184c 719
b3ee09a4
CW
720 rq->infix = rq->ring->emit; /* end of header; start of user payload */
721
2ccdf6a1 722 intel_context_mark_active(ce);
e61e0f51 723 return rq;
05235c53 724
b1c24a61 725err_unwind:
1fc44d9b 726 ce->ring->emit = rq->head;
b1c24a61 727
1618bdb8 728 /* Make sure we didn't add ourselves to external state before freeing */
e61e0f51 729 GEM_BUG_ON(!list_empty(&rq->active_list));
0c7112a0
CW
730 GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
731 GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
1618bdb8 732
ebece753 733err_free:
32eb6bcf 734 kmem_cache_free(global.slab_requests, rq);
28176ef4 735err_unreserve:
1fc44d9b 736 intel_context_unpin(ce);
8e637178 737 return ERR_PTR(ret);
05235c53
CW
738}
739
2ccdf6a1
CW
740struct i915_request *
741i915_request_create(struct intel_context *ce)
742{
743 struct i915_request *rq;
f4d57d83 744 int err;
2ccdf6a1 745
f4d57d83
CW
746 err = intel_context_timeline_lock(ce);
747 if (err)
748 return ERR_PTR(err);
2ccdf6a1
CW
749
750 /* Move our oldest request to the slab-cache (if not in use!) */
751 rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
9db0c5ca 752 if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
2ccdf6a1
CW
753 i915_request_retire(rq);
754
755 intel_context_enter(ce);
756 rq = __i915_request_create(ce, GFP_KERNEL);
757 intel_context_exit(ce); /* active reference transferred to request */
758 if (IS_ERR(rq))
759 goto err_unlock;
760
761 /* Check that we do not interrupt ourselves with a new request */
762 rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
763
764 return rq;
765
766err_unlock:
767 intel_context_timeline_unlock(ce);
768 return rq;
769}
770
0d90ccb7
CW
771static int
772i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
773{
774 if (list_is_first(&signal->ring_link, &signal->ring->request_list))
775 return 0;
776
777 signal = list_prev_entry(signal, ring_link);
f0c02c1b 778 if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
0d90ccb7
CW
779 return 0;
780
781 return i915_sw_fence_await_dma_fence(&rq->submit,
782 &signal->fence, 0,
783 I915_FENCE_GFP);
784}
785
ca6e56f6
CW
786static intel_engine_mask_t
787already_busywaiting(struct i915_request *rq)
788{
789 /*
790 * Polling a semaphore causes bus traffic, delaying other users of
791 * both the GPU and CPU. We want to limit the impact on others,
792 * while taking advantage of early submission to reduce GPU
793 * latency. Therefore we restrict ourselves to not using more
794 * than one semaphore from each source, and not using a semaphore
795 * if we have detected the engine is saturated (i.e. would not be
796 * submitted early and cause bus traffic reading an already passed
797 * semaphore).
798 *
799 * See the are-we-too-late? check in __i915_request_submit().
800 */
44d89409 801 return rq->sched.semaphores | rq->engine->saturated;
ca6e56f6
CW
802}
803
e8861964
CW
804static int
805emit_semaphore_wait(struct i915_request *to,
806 struct i915_request *from,
807 gfp_t gfp)
808{
809 u32 hwsp_offset;
810 u32 *cs;
811 int err;
812
813 GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);
814 GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
815
7881e605 816 /* Just emit the first semaphore we see as request space is limited. */
ca6e56f6 817 if (already_busywaiting(to) & from->engine->mask)
7881e605
CW
818 return i915_sw_fence_await_dma_fence(&to->submit,
819 &from->fence, 0,
820 I915_FENCE_GFP);
821
0d90ccb7
CW
822 err = i915_request_await_start(to, from);
823 if (err < 0)
824 return err;
825
c8a0e2ae 826 /* Only submit our spinner after the signaler is running! */
f71e01a7 827 err = __i915_request_await_execution(to, from, NULL, gfp);
e8861964
CW
828 if (err)
829 return err;
830
c8a0e2ae 831 /* We need to pin the signaler's HWSP until we are finished reading. */
f0c02c1b 832 err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
e8861964
CW
833 if (err)
834 return err;
835
836 cs = intel_ring_begin(to, 4);
837 if (IS_ERR(cs))
838 return PTR_ERR(cs);
839
840 /*
841 * Using greater-than-or-equal here means we have to worry
842 * about seqno wraparound. To side step that issue, we swap
843 * the timeline HWSP upon wrapping, so that everyone listening
844 * for the old (pre-wrap) values do not see the much smaller
845 * (post-wrap) values than they were expecting (and so wait
846 * forever).
847 */
848 *cs++ = MI_SEMAPHORE_WAIT |
849 MI_SEMAPHORE_GLOBAL_GTT |
850 MI_SEMAPHORE_POLL |
851 MI_SEMAPHORE_SAD_GTE_SDD;
852 *cs++ = from->fence.seqno;
853 *cs++ = hwsp_offset;
854 *cs++ = 0;
855
856 intel_ring_advance(to, cs);
7881e605
CW
857 to->sched.semaphores |= from->engine->mask;
858 to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
e8861964
CW
859 return 0;
860}
861
a2bc4695 862static int
e61e0f51 863i915_request_await_request(struct i915_request *to, struct i915_request *from)
a2bc4695 864{
85e17f59 865 int ret;
a2bc4695
CW
866
867 GEM_BUG_ON(to == from);
ceae14bd 868 GEM_BUG_ON(to->timeline == from->timeline);
a2bc4695 869
e61e0f51 870 if (i915_request_completed(from))
ade0b0c9
CW
871 return 0;
872
52e54209 873 if (to->engine->schedule) {
32eb6bcf 874 ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
52e54209
CW
875 if (ret < 0)
876 return ret;
877 }
878
73cb9701
CW
879 if (to->engine == from->engine) {
880 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
881 &from->submit,
2abe2f84 882 I915_FENCE_GFP);
e8861964
CW
883 } else if (intel_engine_has_semaphores(to->engine) &&
884 to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
885 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
6faf5916
CW
886 } else {
887 ret = i915_sw_fence_await_dma_fence(&to->submit,
888 &from->fence, 0,
889 I915_FENCE_GFP);
a2bc4695 890 }
17db337f
CW
891 if (ret < 0)
892 return ret;
893
894 if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
895 ret = i915_sw_fence_await_dma_fence(&to->semaphore,
896 &from->fence, 0,
897 I915_FENCE_GFP);
898 if (ret < 0)
899 return ret;
900 }
a2bc4695 901
17db337f 902 return 0;
a2bc4695
CW
903}
904
b52992c0 905int
e61e0f51 906i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
b52992c0 907{
29ef3fa9
CW
908 struct dma_fence **child = &fence;
909 unsigned int nchild = 1;
b52992c0 910 int ret;
b52992c0 911
e61e0f51
CW
912 /*
913 * Note that if the fence-array was created in signal-on-any mode,
b52992c0
CW
914 * we should *not* decompose it into its individual fences. However,
915 * we don't currently store which mode the fence-array is operating
916 * in. Fortunately, the only user of signal-on-any is private to
917 * amdgpu and we should not see any incoming fence-array from
918 * sync-file being in signal-on-any mode.
919 */
29ef3fa9
CW
920 if (dma_fence_is_array(fence)) {
921 struct dma_fence_array *array = to_dma_fence_array(fence);
922
923 child = array->fences;
924 nchild = array->num_fences;
925 GEM_BUG_ON(!nchild);
926 }
b52992c0 927
29ef3fa9
CW
928 do {
929 fence = *child++;
930 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
931 continue;
b52992c0 932
ceae14bd
CW
933 /*
934 * Requests on the same timeline are explicitly ordered, along
e61e0f51 935 * with their dependencies, by i915_request_add() which ensures
ceae14bd
CW
936 * that requests are submitted in-order through each ring.
937 */
e61e0f51 938 if (fence->context == rq->fence.context)
ceae14bd
CW
939 continue;
940
47979480 941 /* Squash repeated waits to the same timelines */
e61e0f51 942 if (fence->context != rq->i915->mm.unordered_timeline &&
f0c02c1b 943 intel_timeline_sync_is_later(rq->timeline, fence))
47979480
CW
944 continue;
945
29ef3fa9 946 if (dma_fence_is_i915(fence))
e61e0f51 947 ret = i915_request_await_request(rq, to_request(fence));
b52992c0 948 else
e61e0f51 949 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
29ef3fa9 950 I915_FENCE_TIMEOUT,
2abe2f84 951 I915_FENCE_GFP);
b52992c0
CW
952 if (ret < 0)
953 return ret;
47979480
CW
954
955 /* Record the latest fence used against each timeline */
e61e0f51 956 if (fence->context != rq->i915->mm.unordered_timeline)
f0c02c1b 957 intel_timeline_sync_set(rq->timeline, fence);
29ef3fa9 958 } while (--nchild);
b52992c0
CW
959
960 return 0;
961}
962
f71e01a7
CW
963int
964i915_request_await_execution(struct i915_request *rq,
965 struct dma_fence *fence,
966 void (*hook)(struct i915_request *rq,
967 struct dma_fence *signal))
968{
969 struct dma_fence **child = &fence;
970 unsigned int nchild = 1;
971 int ret;
972
973 if (dma_fence_is_array(fence)) {
974 struct dma_fence_array *array = to_dma_fence_array(fence);
975
976 /* XXX Error for signal-on-any fence arrays */
977
978 child = array->fences;
979 nchild = array->num_fences;
980 GEM_BUG_ON(!nchild);
981 }
982
983 do {
984 fence = *child++;
985 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
986 continue;
987
988 /*
989 * We don't squash repeated fence dependencies here as we
990 * want to run our callback in all cases.
991 */
992
993 if (dma_fence_is_i915(fence))
994 ret = __i915_request_await_execution(rq,
995 to_request(fence),
996 hook,
997 I915_FENCE_GFP);
998 else
999 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
1000 I915_FENCE_TIMEOUT,
1001 GFP_KERNEL);
1002 if (ret < 0)
1003 return ret;
1004 } while (--nchild);
1005
1006 return 0;
1007}
1008
a2bc4695 1009/**
e61e0f51 1010 * i915_request_await_object - set this request to (async) wait upon a bo
a2bc4695
CW
1011 * @to: request we are wishing to use
1012 * @obj: object which may be in use on another ring.
d8802126 1013 * @write: whether the wait is on behalf of a writer
a2bc4695
CW
1014 *
1015 * This code is meant to abstract object synchronization with the GPU.
1016 * Conceptually we serialise writes between engines inside the GPU.
1017 * We only allow one engine to write into a buffer at any time, but
1018 * multiple readers. To ensure each has a coherent view of memory, we must:
1019 *
1020 * - If there is an outstanding write request to the object, the new
1021 * request must wait for it to complete (either CPU or in hw, requests
1022 * on the same ring will be naturally ordered).
1023 *
1024 * - If we are a write request (pending_write_domain is set), the new
1025 * request must wait for outstanding read requests to complete.
1026 *
1027 * Returns 0 if successful, else propagates up the lower layer error.
1028 */
1029int
e61e0f51
CW
1030i915_request_await_object(struct i915_request *to,
1031 struct drm_i915_gem_object *obj,
1032 bool write)
a2bc4695 1033{
d07f0e59
CW
1034 struct dma_fence *excl;
1035 int ret = 0;
a2bc4695
CW
1036
1037 if (write) {
d07f0e59
CW
1038 struct dma_fence **shared;
1039 unsigned int count, i;
1040
ef78f7b1 1041 ret = reservation_object_get_fences_rcu(obj->base.resv,
d07f0e59
CW
1042 &excl, &count, &shared);
1043 if (ret)
1044 return ret;
1045
1046 for (i = 0; i < count; i++) {
e61e0f51 1047 ret = i915_request_await_dma_fence(to, shared[i]);
d07f0e59
CW
1048 if (ret)
1049 break;
1050
1051 dma_fence_put(shared[i]);
1052 }
1053
1054 for (; i < count; i++)
1055 dma_fence_put(shared[i]);
1056 kfree(shared);
a2bc4695 1057 } else {
ef78f7b1 1058 excl = reservation_object_get_excl_rcu(obj->base.resv);
a2bc4695
CW
1059 }
1060
d07f0e59
CW
1061 if (excl) {
1062 if (ret == 0)
e61e0f51 1063 ret = i915_request_await_dma_fence(to, excl);
a2bc4695 1064
d07f0e59 1065 dma_fence_put(excl);
a2bc4695
CW
1066 }
1067
d07f0e59 1068 return ret;
a2bc4695
CW
1069}
1070
6dd7526f
CW
1071void i915_request_skip(struct i915_request *rq, int error)
1072{
1073 void *vaddr = rq->ring->vaddr;
1074 u32 head;
1075
1076 GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1077 dma_fence_set_error(&rq->fence, error);
1078
1079 /*
1080 * As this request likely depends on state from the lost
1081 * context, clear out all the user operations leaving the
1082 * breadcrumb at the end (so we get the fence notifications).
1083 */
1084 head = rq->infix;
1085 if (rq->postfix < head) {
1086 memset(vaddr + head, 0, rq->ring->size - head);
1087 head = 0;
1088 }
1089 memset(vaddr + head, 0, rq->postfix - head);
1090}
1091
ea593dbb
CW
1092static struct i915_request *
1093__i915_request_add_to_timeline(struct i915_request *rq)
1094{
f0c02c1b 1095 struct intel_timeline *timeline = rq->timeline;
ea593dbb
CW
1096 struct i915_request *prev;
1097
1098 /*
1099 * Dependency tracking and request ordering along the timeline
1100 * is special cased so that we can eliminate redundant ordering
1101 * operations while building the request (we know that the timeline
1102 * itself is ordered, and here we guarantee it).
1103 *
1104 * As we know we will need to emit tracking along the timeline,
1105 * we embed the hooks into our request struct -- at the cost of
1106 * having to have specialised no-allocation interfaces (which will
1107 * be beneficial elsewhere).
1108 *
1109 * A second benefit to open-coding i915_request_await_request is
1110 * that we can apply a slight variant of the rules specialised
1111 * for timelines that jump between engines (such as virtual engines).
1112 * If we consider the case of virtual engine, we must emit a dma-fence
1113 * to prevent scheduling of the second request until the first is
1114 * complete (to maximise our greedy late load balancing) and this
1115 * precludes optimising to use semaphores serialisation of a single
1116 * timeline across engines.
1117 */
2ccdf6a1 1118 prev = rcu_dereference_protected(timeline->last_request.request, 1);
ea593dbb
CW
1119 if (prev && !i915_request_completed(prev)) {
1120 if (is_power_of_2(prev->engine->mask | rq->engine->mask))
1121 i915_sw_fence_await_sw_fence(&rq->submit,
1122 &prev->submit,
1123 &rq->submitq);
1124 else
1125 __i915_sw_fence_await_dma_fence(&rq->submit,
1126 &prev->fence,
1127 &rq->dmaq);
1128 if (rq->engine->schedule)
1129 __i915_sched_node_add_dependency(&rq->sched,
1130 &prev->sched,
1131 &rq->dep,
1132 0);
1133 }
1134
ea593dbb 1135 list_add_tail(&rq->link, &timeline->requests);
ea593dbb 1136
2ccdf6a1
CW
1137 /*
1138 * Make sure that no request gazumped us - if it was allocated after
1139 * our i915_request_alloc() and called __i915_request_add() before
1140 * us, the timeline will hold its seqno which is later than ours.
1141 */
ea593dbb
CW
1142 GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
1143 __i915_active_request_set(&timeline->last_request, rq);
1144
1145 return prev;
1146}
1147
05235c53
CW
1148/*
1149 * NB: This function is not allowed to fail. Doing so would mean the the
1150 * request is not being tracked for completion but the work itself is
1151 * going to happen on the hardware. This would be a Bad Thing(tm).
1152 */
2ccdf6a1 1153struct i915_request *__i915_request_commit(struct i915_request *rq)
05235c53 1154{
2ccdf6a1
CW
1155 struct intel_engine_cs *engine = rq->engine;
1156 struct intel_ring *ring = rq->ring;
e61e0f51 1157 struct i915_request *prev;
73dec95e 1158 u32 *cs;
05235c53 1159
dd847a70 1160 GEM_TRACE("%s fence %llx:%lld\n",
2ccdf6a1 1161 engine->name, rq->fence.context, rq->fence.seqno);
c781c978 1162
05235c53
CW
1163 /*
1164 * To ensure that this call will not fail, space for its emissions
1165 * should already have been reserved in the ring buffer. Let the ring
1166 * know that it is time to use that space up.
1167 */
2ccdf6a1
CW
1168 GEM_BUG_ON(rq->reserved_space > ring->space);
1169 rq->reserved_space = 0;
05235c53 1170
8ac71d1d
CW
1171 /*
1172 * Record the position of the start of the breadcrumb so that
05235c53
CW
1173 * should we detect the updated seqno part-way through the
1174 * GPU processing the request, we never over-estimate the
d045446d 1175 * position of the ring's HEAD.
05235c53 1176 */
2ccdf6a1 1177 cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
73dec95e 1178 GEM_BUG_ON(IS_ERR(cs));
2ccdf6a1 1179 rq->postfix = intel_ring_offset(rq, cs);
05235c53 1180
2ccdf6a1 1181 prev = __i915_request_add_to_timeline(rq);
f2d13290 1182
2ccdf6a1
CW
1183 list_add_tail(&rq->ring_link, &ring->request_list);
1184 if (list_is_first(&rq->ring_link, &ring->request_list))
1185 list_add(&ring->active_link, &rq->i915->gt.active_rings);
2ccdf6a1 1186 rq->emitted_jiffies = jiffies;
0f25dff6 1187
8ac71d1d
CW
1188 /*
1189 * Let the backend know a new request has arrived that may need
0de9136d
CW
1190 * to adjust the existing execution schedule due to a high priority
1191 * request - i.e. we may want to preempt the current request in order
1192 * to run a high priority dependency chain *before* we can execute this
1193 * request.
1194 *
1195 * This is called before the request is ready to run so that we can
1196 * decide whether to preempt the entire chain so that it is ready to
1197 * run at the earliest possible convenience.
1198 */
71ace7ca 1199 local_bh_disable();
2ccdf6a1 1200 i915_sw_fence_commit(&rq->semaphore);
b16c7651 1201 if (engine->schedule) {
2ccdf6a1 1202 struct i915_sched_attr attr = rq->gem_context->sched;
b16c7651 1203
f9e9e9de
CW
1204 /*
1205 * Boost actual workloads past semaphores!
1206 *
1207 * With semaphores we spin on one engine waiting for another,
1208 * simply to reduce the latency of starting our work when
1209 * the signaler completes. However, if there is any other
1210 * work that we could be doing on this engine instead, that
1211 * is better utilisation and will reduce the overall duration
1212 * of the current work. To avoid PI boosting a semaphore
1213 * far in the distance past over useful work, we keep a history
1214 * of any semaphore use along our dependency chain.
1215 */
2ccdf6a1 1216 if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
f9e9e9de
CW
1217 attr.priority |= I915_PRIORITY_NOSEMAPHORE;
1218
b16c7651
CW
1219 /*
1220 * Boost priorities to new clients (new request flows).
1221 *
1222 * Allow interactive/synchronous clients to jump ahead of
1223 * the bulk clients. (FQ_CODEL)
1224 */
2ccdf6a1 1225 if (list_empty(&rq->sched.signalers_list))
68fc728b 1226 attr.priority |= I915_PRIORITY_WAIT;
b16c7651 1227
2ccdf6a1 1228 engine->schedule(rq, &attr);
b16c7651 1229 }
2ccdf6a1 1230 i915_sw_fence_commit(&rq->submit);
5590af3e 1231 local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
c22b355f 1232
2ccdf6a1
CW
1233 return prev;
1234}
1235
1236void i915_request_add(struct i915_request *rq)
1237{
1238 struct i915_request *prev;
1239
1240 lockdep_assert_held(&rq->timeline->mutex);
1241 lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
1242
1243 trace_i915_request_add(rq);
1244
1245 prev = __i915_request_commit(rq);
1246
c22b355f
CW
1247 /*
1248 * In typical scenarios, we do not expect the previous request on
1249 * the timeline to be still tracked by timeline->last_request if it
1250 * has been completed. If the completed request is still here, that
1251 * implies that request retirement is a long way behind submission,
1252 * suggesting that we haven't been retiring frequently enough from
1253 * the combination of retire-before-alloc, waiters and the background
1254 * retirement worker. So if the last request on this timeline was
1255 * already completed, do a catch up pass, flushing the retirement queue
1256 * up to this client. Since we have now moved the heaviest operations
1257 * during retirement onto secondary workers, such as freeing objects
1258 * or contexts, retiring a bunch of requests is mostly list management
1259 * (and cache misses), and so we should not be overly penalizing this
1260 * client by performing excess work, though we may still performing
1261 * work on behalf of others -- but instead we should benefit from
1262 * improved resource management. (Well, that's the theory at least.)
1263 */
e61e0f51
CW
1264 if (prev && i915_request_completed(prev))
1265 i915_request_retire_upto(prev);
3ef71149 1266
2ccdf6a1 1267 mutex_unlock(&rq->timeline->mutex);
05235c53
CW
1268}
1269
1270static unsigned long local_clock_us(unsigned int *cpu)
1271{
1272 unsigned long t;
1273
e61e0f51
CW
1274 /*
1275 * Cheaply and approximately convert from nanoseconds to microseconds.
05235c53
CW
1276 * The result and subsequent calculations are also defined in the same
1277 * approximate microseconds units. The principal source of timing
1278 * error here is from the simple truncation.
1279 *
1280 * Note that local_clock() is only defined wrt to the current CPU;
1281 * the comparisons are no longer valid if we switch CPUs. Instead of
1282 * blocking preemption for the entire busywait, we can detect the CPU
1283 * switch and use that as indicator of system load and a reason to
1284 * stop busywaiting, see busywait_stop().
1285 */
1286 *cpu = get_cpu();
1287 t = local_clock() >> 10;
1288 put_cpu();
1289
1290 return t;
1291}
1292
1293static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1294{
1295 unsigned int this_cpu;
1296
1297 if (time_after(local_clock_us(&this_cpu), timeout))
1298 return true;
1299
1300 return this_cpu != cpu;
1301}
1302
52c0fdb2
CW
1303static bool __i915_spin_request(const struct i915_request * const rq,
1304 int state, unsigned long timeout_us)
05235c53 1305{
52c0fdb2 1306 unsigned int cpu;
b2f2f0fc
CW
1307
1308 /*
1309 * Only wait for the request if we know it is likely to complete.
1310 *
1311 * We don't track the timestamps around requests, nor the average
1312 * request length, so we do not have a good indicator that this
1313 * request will complete within the timeout. What we do know is the
52c0fdb2
CW
1314 * order in which requests are executed by the context and so we can
1315 * tell if the request has been started. If the request is not even
1316 * running yet, it is a fair assumption that it will not complete
1317 * within our relatively short timeout.
b2f2f0fc 1318 */
52c0fdb2 1319 if (!i915_request_is_running(rq))
b2f2f0fc
CW
1320 return false;
1321
e61e0f51
CW
1322 /*
1323 * When waiting for high frequency requests, e.g. during synchronous
05235c53
CW
1324 * rendering split between the CPU and GPU, the finite amount of time
1325 * required to set up the irq and wait upon it limits the response
1326 * rate. By busywaiting on the request completion for a short while we
1327 * can service the high frequency waits as quick as possible. However,
1328 * if it is a slow request, we want to sleep as quickly as possible.
1329 * The tradeoff between waiting and sleeping is roughly the time it
1330 * takes to sleep on a request, on the order of a microsecond.
1331 */
1332
1333 timeout_us += local_clock_us(&cpu);
1334 do {
52c0fdb2
CW
1335 if (i915_request_completed(rq))
1336 return true;
c33ed067 1337
05235c53
CW
1338 if (signal_pending_state(state, current))
1339 break;
1340
1341 if (busywait_stop(timeout_us, cpu))
1342 break;
1343
f2f09a4c 1344 cpu_relax();
05235c53
CW
1345 } while (!need_resched());
1346
1347 return false;
1348}
1349
52c0fdb2
CW
1350struct request_wait {
1351 struct dma_fence_cb cb;
1352 struct task_struct *tsk;
1353};
1354
1355static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
1356{
1357 struct request_wait *wait = container_of(cb, typeof(*wait), cb);
1358
1359 wake_up_process(wait->tsk);
1360}
1361
05235c53 1362/**
e532be89 1363 * i915_request_wait - wait until execution of request has finished
e61e0f51 1364 * @rq: the request to wait upon
ea746f36 1365 * @flags: how to wait
e95433c7
CW
1366 * @timeout: how long to wait in jiffies
1367 *
e532be89 1368 * i915_request_wait() waits for the request to be completed, for a
e95433c7
CW
1369 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1370 * unbounded wait).
05235c53 1371 *
e95433c7
CW
1372 * Returns the remaining time (in jiffies) if the request completed, which may
1373 * be zero or -ETIME if the request is unfinished after the timeout expires.
1374 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1375 * pending before the request completes.
05235c53 1376 */
e61e0f51 1377long i915_request_wait(struct i915_request *rq,
e95433c7
CW
1378 unsigned int flags,
1379 long timeout)
05235c53 1380{
ea746f36
CW
1381 const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1382 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
52c0fdb2 1383 struct request_wait wait;
05235c53
CW
1384
1385 might_sleep();
e95433c7 1386 GEM_BUG_ON(timeout < 0);
05235c53 1387
6e4e9708 1388 if (dma_fence_is_signaled(&rq->fence))
e95433c7 1389 return timeout;
05235c53 1390
e95433c7
CW
1391 if (!timeout)
1392 return -ETIME;
05235c53 1393
e61e0f51 1394 trace_i915_request_wait_begin(rq, flags);
84383d2e
CW
1395
1396 /*
1397 * We must never wait on the GPU while holding a lock as we
1398 * may need to perform a GPU reset. So while we don't need to
1399 * serialise wait/reset with an explicit lock, we do want
1400 * lockdep to detect potential dependency cycles.
1401 */
cb823ed9 1402 mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
4680816b 1403
7ce99d24
CW
1404 /*
1405 * Optimistic spin before touching IRQs.
1406 *
1407 * We may use a rather large value here to offset the penalty of
1408 * switching away from the active task. Frequently, the client will
1409 * wait upon an old swapbuffer to throttle itself to remain within a
1410 * frame of the gpu. If the client is running in lockstep with the gpu,
1411 * then it should not be waiting long at all, and a sleep now will incur
1412 * extra scheduler latency in producing the next frame. To try to
1413 * avoid adding the cost of enabling/disabling the interrupt to the
1414 * short wait, we first spin to see if the request would have completed
1415 * in the time taken to setup the interrupt.
1416 *
1417 * We need upto 5us to enable the irq, and upto 20us to hide the
1418 * scheduler latency of a context switch, ignoring the secondary
1419 * impacts from a context switch such as cache eviction.
1420 *
1421 * The scheme used for low-latency IO is called "hybrid interrupt
1422 * polling". The suggestion there is to sleep until just before you
1423 * expect to be woken by the device interrupt and then poll for its
1424 * completion. That requires having a good predictor for the request
1425 * duration, which we currently lack.
1426 */
1427 if (CONFIG_DRM_I915_SPIN_REQUEST &&
6e4e9708
CW
1428 __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
1429 dma_fence_signal(&rq->fence);
52c0fdb2 1430 goto out;
6e4e9708 1431 }
541ca6ed 1432
62eb3c24
CW
1433 /*
1434 * This client is about to stall waiting for the GPU. In many cases
1435 * this is undesirable and limits the throughput of the system, as
1436 * many clients cannot continue processing user input/output whilst
1437 * blocked. RPS autotuning may take tens of milliseconds to respond
1438 * to the GPU load and thus incurs additional latency for the client.
1439 * We can circumvent that by promoting the GPU frequency to maximum
1440 * before we sleep. This makes the GPU throttle up much more quickly
1441 * (good for benchmarks and user experience, e.g. window animations),
1442 * but at a cost of spending more power processing the workload
1443 * (bad for battery).
1444 */
1445 if (flags & I915_WAIT_PRIORITY) {
1446 if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
1447 gen6_rps_boost(rq);
52c0fdb2 1448 i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
62eb3c24 1449 }
4680816b 1450
52c0fdb2
CW
1451 wait.tsk = current;
1452 if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
1453 goto out;
4680816b 1454
52c0fdb2
CW
1455 for (;;) {
1456 set_current_state(state);
05235c53 1457
ce94bef9
CW
1458 if (i915_request_completed(rq)) {
1459 dma_fence_signal(&rq->fence);
52c0fdb2 1460 break;
ce94bef9 1461 }
05235c53 1462
05235c53 1463 if (signal_pending_state(state, current)) {
e95433c7 1464 timeout = -ERESTARTSYS;
05235c53
CW
1465 break;
1466 }
1467
e95433c7
CW
1468 if (!timeout) {
1469 timeout = -ETIME;
05235c53
CW
1470 break;
1471 }
1472
e95433c7 1473 timeout = io_schedule_timeout(timeout);
05235c53 1474 }
a49625f9 1475 __set_current_state(TASK_RUNNING);
05235c53 1476
52c0fdb2
CW
1477 dma_fence_remove_callback(&rq->fence, &wait.cb);
1478
1479out:
cb823ed9 1480 mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
52c0fdb2 1481 trace_i915_request_wait_end(rq);
e95433c7 1482 return timeout;
05235c53 1483}
4b8de8e6 1484
79ffac85 1485bool i915_retire_requests(struct drm_i915_private *i915)
4b8de8e6 1486{
643b450a 1487 struct intel_ring *ring, *tmp;
4b8de8e6 1488
e61e0f51 1489 lockdep_assert_held(&i915->drm.struct_mutex);
4b8de8e6 1490
65baf0ef
CW
1491 list_for_each_entry_safe(ring, tmp,
1492 &i915->gt.active_rings, active_link) {
1493 intel_ring_get(ring); /* last rq holds reference! */
b887d615 1494 ring_retire_requests(ring);
65baf0ef
CW
1495 intel_ring_put(ring);
1496 }
79ffac85
CW
1497
1498 return !list_empty(&i915->gt.active_rings);
4b8de8e6 1499}
c835c550
CW
1500
1501#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1502#include "selftests/mock_request.c"
e61e0f51 1503#include "selftests/i915_request.c"
c835c550 1504#endif
32eb6bcf 1505
103b76ee
CW
1506static void i915_global_request_shrink(void)
1507{
1508 kmem_cache_shrink(global.slab_dependencies);
1509 kmem_cache_shrink(global.slab_execute_cbs);
1510 kmem_cache_shrink(global.slab_requests);
1511}
1512
1513static void i915_global_request_exit(void)
1514{
1515 kmem_cache_destroy(global.slab_dependencies);
1516 kmem_cache_destroy(global.slab_execute_cbs);
1517 kmem_cache_destroy(global.slab_requests);
1518}
1519
1520static struct i915_global_request global = { {
1521 .shrink = i915_global_request_shrink,
1522 .exit = i915_global_request_exit,
1523} };
1524
32eb6bcf
CW
1525int __init i915_global_request_init(void)
1526{
1527 global.slab_requests = KMEM_CACHE(i915_request,
1528 SLAB_HWCACHE_ALIGN |
1529 SLAB_RECLAIM_ACCOUNT |
1530 SLAB_TYPESAFE_BY_RCU);
1531 if (!global.slab_requests)
1532 return -ENOMEM;
1533
e8861964
CW
1534 global.slab_execute_cbs = KMEM_CACHE(execute_cb,
1535 SLAB_HWCACHE_ALIGN |
1536 SLAB_RECLAIM_ACCOUNT |
1537 SLAB_TYPESAFE_BY_RCU);
1538 if (!global.slab_execute_cbs)
1539 goto err_requests;
1540
32eb6bcf
CW
1541 global.slab_dependencies = KMEM_CACHE(i915_dependency,
1542 SLAB_HWCACHE_ALIGN |
1543 SLAB_RECLAIM_ACCOUNT);
1544 if (!global.slab_dependencies)
e8861964 1545 goto err_execute_cbs;
32eb6bcf 1546
103b76ee 1547 i915_global_register(&global.base);
32eb6bcf
CW
1548 return 0;
1549
e8861964
CW
1550err_execute_cbs:
1551 kmem_cache_destroy(global.slab_execute_cbs);
32eb6bcf
CW
1552err_requests:
1553 kmem_cache_destroy(global.slab_requests);
1554 return -ENOMEM;
1555}