drivers/gpu/drm/i915/i915_gem_request.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "i915_drv.h"
  26
  27 static const char *i915_fence_get_driver_name(struct fence *fence)
  28 {
  29         return "i915";
  30 }
  31
  32 static const char *i915_fence_get_timeline_name(struct fence *fence)
  33 {
  34         /* Timelines are bound by eviction to a VM. However, since
  35          * we only have a global seqno at the moment, we only have
  36          * a single timeline. Note that each timeline will have
  37          * multiple execution contexts (fence contexts) as we allow
  38          * engines within a single timeline to execute in parallel.
  39          */
  40         return "global";
  41 }
  42
  43 static bool i915_fence_signaled(struct fence *fence)
  44 {
  45         return i915_gem_request_completed(to_request(fence));
  46 }
  47
  48 static bool i915_fence_enable_signaling(struct fence *fence)
  49 {
  50         if (i915_fence_signaled(fence))
  51                 return false;
  52
  53         intel_engine_enable_signaling(to_request(fence));
  54         return true;
  55 }
  56
  57 static signed long i915_fence_wait(struct fence *fence,
  58                                    bool interruptible,
  59                                    signed long timeout_jiffies)
  60 {
  61         s64 timeout_ns, *timeout;
  62         int ret;
  63
  64         if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
  65                 timeout_ns = jiffies_to_nsecs(timeout_jiffies);
  66                 timeout = &timeout_ns;
  67         } else {
  68                 timeout = NULL;
  69         }
  70
  71         ret = __i915_wait_request(to_request(fence),
  72                                   interruptible, timeout,
  73                                   NO_WAITBOOST);
  74         if (ret == -ETIME)
  75                 return 0;
  76
  77         if (ret < 0)
  78                 return ret;
  79
  80         if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
  81                 timeout_jiffies = nsecs_to_jiffies(timeout_ns);
  82
  83         return timeout_jiffies;
  84 }
  85
  86 static void i915_fence_value_str(struct fence *fence, char *str, int size)
  87 {
  88         snprintf(str, size, "%u", fence->seqno);
  89 }
  90
  91 static void i915_fence_timeline_value_str(struct fence *fence, char *str,
  92                                           int size)
  93 {
  94         snprintf(str, size, "%u",
  95                  intel_engine_get_seqno(to_request(fence)->engine));
  96 }
  97
  98 static void i915_fence_release(struct fence *fence)
  99 {
 100         struct drm_i915_gem_request *req = to_request(fence);
 101
 102         kmem_cache_free(req->i915->requests, req);
 103 }
 104
 105 const struct fence_ops i915_fence_ops = {
 106         .get_driver_name = i915_fence_get_driver_name,
 107         .get_timeline_name = i915_fence_get_timeline_name,
 108         .enable_signaling = i915_fence_enable_signaling,
 109         .signaled = i915_fence_signaled,
 110         .wait = i915_fence_wait,
 111         .release = i915_fence_release,
 112         .fence_value_str = i915_fence_value_str,
 113         .timeline_value_str = i915_fence_timeline_value_str,
 114 };
 115
 116 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 117                                    struct drm_file *file)
 118 {
 119         struct drm_i915_private *dev_private;
 120         struct drm_i915_file_private *file_priv;
 121
 122         WARN_ON(!req || !file || req->file_priv);
 123
 124         if (!req || !file)
 125                 return -EINVAL;
 126
 127         if (req->file_priv)
 128                 return -EINVAL;
 129
 130         dev_private = req->i915;
 131         file_priv = file->driver_priv;
 132
 133         spin_lock(&file_priv->mm.lock);
 134         req->file_priv = file_priv;
 135         list_add_tail(&req->client_list, &file_priv->mm.request_list);
 136         spin_unlock(&file_priv->mm.lock);
 137
 138         req->pid = get_pid(task_pid(current));
 139
 140         return 0;
 141 }
 142
 143 static inline void
 144 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 145 {
 146         struct drm_i915_file_private *file_priv = request->file_priv;
 147
 148         if (!file_priv)
 149                 return;
 150
 151         spin_lock(&file_priv->mm.lock);
 152         list_del(&request->client_list);
 153         request->file_priv = NULL;
 154         spin_unlock(&file_priv->mm.lock);
 155
 156         put_pid(request->pid);
 157         request->pid = NULL;
 158 }
 159
 160 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 161 {
 162         trace_i915_gem_request_retire(request);
 163         list_del_init(&request->list);
 164
 165         /* We know the GPU must have read the request to have
 166          * sent us the seqno + interrupt, so use the position
 167          * of tail of the request to update the last known position
 168          * of the GPU head.
 169          *
 170          * Note this requires that we are always called in request
 171          * completion order.
 172          */
 173         request->ring->last_retired_head = request->postfix;
 174
 175         i915_gem_request_remove_from_client(request);
 176
 177         if (request->previous_context) {
 178                 if (i915.enable_execlists)
 179                         intel_lr_context_unpin(request->previous_context,
 180                                                request->engine);
 181         }
 182
 183         i915_gem_context_put(request->ctx);
 184         i915_gem_request_put(request);
 185 }
 186
 187 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 188 {
 189         struct intel_engine_cs *engine = req->engine;
 190         struct drm_i915_gem_request *tmp;
 191
 192         lockdep_assert_held(&req->i915->drm.struct_mutex);
 193
 194         if (list_empty(&req->list))
 195                 return;
 196
 197         do {
 198                 tmp = list_first_entry(&engine->request_list,
 199                                        typeof(*tmp), list);
 200
 201                 i915_gem_request_retire(tmp);
 202         } while (tmp != req);
 203
 204         WARN_ON(i915_verify_lists(engine->dev));
 205 }
 206
 207 static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
 208 {
 209         if (__i915_terminally_wedged(reset_counter))
 210                 return -EIO;
 211
 212         if (__i915_reset_in_progress(reset_counter)) {
 213                 /* Non-interruptible callers can't handle -EAGAIN, hence return
 214                  * -EIO unconditionally for these.
 215                  */
 216                 if (!interruptible)
 217                         return -EIO;
 218
 219                 return -EAGAIN;
 220         }
 221
 222         return 0;
 223 }
 224
 225 static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 226 {
 227         struct intel_engine_cs *engine;
 228         int ret;
 229
 230         /* Carefully retire all requests without writing to the rings */
 231         for_each_engine(engine, dev_priv) {
 232                 ret = intel_engine_idle(engine);
 233                 if (ret)
 234                         return ret;
 235         }
 236         i915_gem_retire_requests(dev_priv);
 237
 238         /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 239         if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
 240                 while (intel_kick_waiters(dev_priv) ||
 241                        intel_kick_signalers(dev_priv))
 242                         yield();
 243         }
 244
 245         /* Finally reset hw state */
 246         for_each_engine(engine, dev_priv)
 247                 intel_engine_init_seqno(engine, seqno);
 248
 249         return 0;
 250 }
 251
 252 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 253 {
 254         struct drm_i915_private *dev_priv = to_i915(dev);
 255         int ret;
 256
 257         if (seqno == 0)
 258                 return -EINVAL;
 259
 260         /* HWS page needs to be set less than what we
 261          * will inject to ring
 262          */
 263         ret = i915_gem_init_seqno(dev_priv, seqno - 1);
 264         if (ret)
 265                 return ret;
 266
 267         /* Carefully set the last_seqno value so that wrap
 268          * detection still works
 269          */
 270         dev_priv->next_seqno = seqno;
 271         dev_priv->last_seqno = seqno - 1;
 272         if (dev_priv->last_seqno == 0)
 273                 dev_priv->last_seqno--;
 274
 275         return 0;
 276 }
 277
 278 static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 279 {
 280         /* reserve 0 for non-seqno */
 281         if (unlikely(dev_priv->next_seqno == 0)) {
 282                 int ret;
 283
 284                 ret = i915_gem_init_seqno(dev_priv, 0);
 285                 if (ret)
 286                         return ret;
 287
 288                 dev_priv->next_seqno = 1;
 289         }
 290
 291         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 292         return 0;
 293 }
 294
 295 static inline int
 296 __i915_gem_request_alloc(struct intel_engine_cs *engine,
 297                          struct i915_gem_context *ctx,
 298                          struct drm_i915_gem_request **req_out)
 299 {
 300         struct drm_i915_private *dev_priv = engine->i915;
 301         unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 302         struct drm_i915_gem_request *req;
 303         u32 seqno;
 304         int ret;
 305
 306         if (!req_out)
 307                 return -EINVAL;
 308
 309         *req_out = NULL;
 310
 311         /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 312          * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 313          * and restart.
 314          */
 315         ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
 316         if (ret)
 317                 return ret;
 318
 319         /* Move the oldest request to the slab-cache (if not in use!) */
 320         req = list_first_entry_or_null(&engine->request_list,
 321                                        typeof(*req), list);
 322         if (req && i915_gem_request_completed(req))
 323                 i915_gem_request_retire(req);
 324
 325         req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 326         if (!req)
 327                 return -ENOMEM;
 328
 329         ret = i915_gem_get_seqno(dev_priv, &seqno);
 330         if (ret)
 331                 goto err;
 332
 333         spin_lock_init(&req->lock);
 334         fence_init(&req->fence,
 335                    &i915_fence_ops,
 336                    &req->lock,
 337                    engine->fence_context,
 338                    seqno);
 339
 340         req->i915 = dev_priv;
 341         req->engine = engine;
 342         req->ctx = i915_gem_context_get(ctx);
 343
 344         /*
 345          * Reserve space in the ring buffer for all the commands required to
 346          * eventually emit this request. This is to guarantee that the
 347          * i915_add_request() call can't fail. Note that the reserve may need
 348          * to be redone if the request is not actually submitted straight
 349          * away, e.g. because a GPU scheduler has deferred it.
 350          */
 351         req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 352
 353         if (i915.enable_execlists)
 354                 ret = intel_logical_ring_alloc_request_extras(req);
 355         else
 356                 ret = intel_ring_alloc_request_extras(req);
 357         if (ret)
 358                 goto err_ctx;
 359
 360         *req_out = req;
 361         return 0;
 362
 363 err_ctx:
 364         i915_gem_context_put(ctx);
 365 err:
 366         kmem_cache_free(dev_priv->requests, req);
 367         return ret;
 368 }
 369
 370 /**
 371  * i915_gem_request_alloc - allocate a request structure
 372  *
 373  * @engine: engine that we wish to issue the request on.
 374  * @ctx: context that the request will be associated with.
 375  *       This can be NULL if the request is not directly related to
 376  *       any specific user context, in which case this function will
 377  *       choose an appropriate context to use.
 378  *
 379  * Returns a pointer to the allocated request if successful,
 380  * or an error code if not.
 381  */
 382 struct drm_i915_gem_request *
 383 i915_gem_request_alloc(struct intel_engine_cs *engine,
 384                        struct i915_gem_context *ctx)
 385 {
 386         struct drm_i915_gem_request *req;
 387         int err;
 388
 389         if (!ctx)
 390                 ctx = engine->i915->kernel_context;
 391         err = __i915_gem_request_alloc(engine, ctx, &req);
 392         return err ? ERR_PTR(err) : req;
 393 }
 394
 395 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 396 {
 397         struct drm_i915_private *dev_priv = engine->i915;
 398
 399         dev_priv->gt.active_engines |= intel_engine_flag(engine);
 400         if (dev_priv->gt.awake)
 401                 return;
 402
 403         intel_runtime_pm_get_noresume(dev_priv);
 404         dev_priv->gt.awake = true;
 405
 406         intel_enable_gt_powersave(dev_priv);
 407         i915_update_gfx_val(dev_priv);
 408         if (INTEL_GEN(dev_priv) >= 6)
 409                 gen6_rps_busy(dev_priv);
 410
 411         queue_delayed_work(dev_priv->wq,
 412                            &dev_priv->gt.retire_work,
 413                            round_jiffies_up_relative(HZ));
 414 }
 415
 416 /*
 417  * NB: This function is not allowed to fail. Doing so would mean the the
 418  * request is not being tracked for completion but the work itself is
 419  * going to happen on the hardware. This would be a Bad Thing(tm).
 420  */
 421 void __i915_add_request(struct drm_i915_gem_request *request,
 422                         struct drm_i915_gem_object *obj,
 423                         bool flush_caches)
 424 {
 425         struct intel_engine_cs *engine;
 426         struct intel_ring *ring;
 427         u32 request_start;
 428         u32 reserved_tail;
 429         int ret;
 430
 431         if (WARN_ON(!request))
 432                 return;
 433
 434         engine = request->engine;
 435         ring = request->ring;
 436
 437         /*
 438          * To ensure that this call will not fail, space for its emissions
 439          * should already have been reserved in the ring buffer. Let the ring
 440          * know that it is time to use that space up.
 441          */
 442         request_start = intel_ring_get_tail(ring);
 443         reserved_tail = request->reserved_space;
 444         request->reserved_space = 0;
 445
 446         /*
 447          * Emit any outstanding flushes - execbuf can fail to emit the flush
 448          * after having emitted the batchbuffer command. Hence we need to fix
 449          * things up similar to emitting the lazy request. The difference here
 450          * is that the flush _must_ happen before the next request, no matter
 451          * what.
 452          */
 453         if (flush_caches) {
 454                 if (i915.enable_execlists)
 455                         ret = logical_ring_flush_all_caches(request);
 456                 else
 457                         ret = intel_engine_flush_all_caches(request);
 458                 /* Not allowed to fail! */
 459                 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
 460         }
 461
 462         trace_i915_gem_request_add(request);
 463
 464         request->head = request_start;
 465
 466         /* Whilst this request exists, batch_obj will be on the
 467          * active_list, and so will hold the active reference. Only when this
 468          * request is retired will the the batch_obj be moved onto the
 469          * inactive_list and lose its active reference. Hence we do not need
 470          * to explicitly hold another reference here.
 471          */
 472         request->batch_obj = obj;
 473
 474         /* Seal the request and mark it as pending execution. Note that
 475          * we may inspect this state, without holding any locks, during
 476          * hangcheck. Hence we apply the barrier to ensure that we do not
 477          * see a more recent value in the hws than we are tracking.
 478          */
 479         request->emitted_jiffies = jiffies;
 480         request->previous_seqno = engine->last_submitted_seqno;
 481         smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
 482         list_add_tail(&request->list, &engine->request_list);
 483
 484         /* Record the position of the start of the request so that
 485          * should we detect the updated seqno part-way through the
 486          * GPU processing the request, we never over-estimate the
 487          * position of the head.
 488          */
 489         request->postfix = intel_ring_get_tail(ring);
 490
 491         if (i915.enable_execlists) {
 492                 ret = engine->emit_request(request);
 493         } else {
 494                 ret = engine->add_request(request);
 495
 496                 request->tail = intel_ring_get_tail(ring);
 497         }
 498         /* Not allowed to fail! */
 499         WARN(ret, "emit|add_request failed: %d!\n", ret);
 500         /* Sanity check that the reserved size was large enough. */
 501         ret = intel_ring_get_tail(ring) - request_start;
 502         if (ret < 0)
 503                 ret += ring->size;
 504         WARN_ONCE(ret > reserved_tail,
 505                   "Not enough space reserved (%d bytes) "
 506                   "for adding the request (%d bytes)\n",
 507                   reserved_tail, ret);
 508
 509         i915_gem_mark_busy(engine);
 510 }
 511
 512 static unsigned long local_clock_us(unsigned int *cpu)
 513 {
 514         unsigned long t;
 515
 516         /* Cheaply and approximately convert from nanoseconds to microseconds.
 517          * The result and subsequent calculations are also defined in the same
 518          * approximate microseconds units. The principal source of timing
 519          * error here is from the simple truncation.
 520          *
 521          * Note that local_clock() is only defined wrt to the current CPU;
 522          * the comparisons are no longer valid if we switch CPUs. Instead of
 523          * blocking preemption for the entire busywait, we can detect the CPU
 524          * switch and use that as indicator of system load and a reason to
 525          * stop busywaiting, see busywait_stop().
 526          */
 527         *cpu = get_cpu();
 528         t = local_clock() >> 10;
 529         put_cpu();
 530
 531         return t;
 532 }
 533
 534 static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 535 {
 536         unsigned int this_cpu;
 537
 538         if (time_after(local_clock_us(&this_cpu), timeout))
 539                 return true;
 540
 541         return this_cpu != cpu;
 542 }
 543
 544 bool __i915_spin_request(const struct drm_i915_gem_request *req,
 545                          int state, unsigned long timeout_us)
 546 {
 547         unsigned int cpu;
 548
 549         /* When waiting for high frequency requests, e.g. during synchronous
 550          * rendering split between the CPU and GPU, the finite amount of time
 551          * required to set up the irq and wait upon it limits the response
 552          * rate. By busywaiting on the request completion for a short while we
 553          * can service the high frequency waits as quick as possible. However,
 554          * if it is a slow request, we want to sleep as quickly as possible.
 555          * The tradeoff between waiting and sleeping is roughly the time it
 556          * takes to sleep on a request, on the order of a microsecond.
 557          */
 558
 559         timeout_us += local_clock_us(&cpu);
 560         do {
 561                 if (i915_gem_request_completed(req))
 562                         return true;
 563
 564                 if (signal_pending_state(state, current))
 565                         break;
 566
 567                 if (busywait_stop(timeout_us, cpu))
 568                         break;
 569
 570                 cpu_relax_lowlatency();
 571         } while (!need_resched());
 572
 573         return false;
 574 }
 575
 576 /**
 577  * __i915_wait_request - wait until execution of request has finished
 578  * @req: duh!
 579  * @interruptible: do an interruptible wait (normally yes)
 580  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 581  * @rps: client to charge for RPS boosting
 582  *
 583  * Note: It is of utmost importance that the passed in seqno and reset_counter
 584  * values have been read by the caller in an smp safe manner. Where read-side
 585  * locks are involved, it is sufficient to read the reset_counter before
 586  * unlocking the lock that protects the seqno. For lockless tricks, the
 587  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
 588  * inserted.
 589  *
 590  * Returns 0 if the request was found within the alloted time. Else returns the
 591  * errno with remaining time filled in timeout argument.
 592  */
 593 int __i915_wait_request(struct drm_i915_gem_request *req,
 594                         bool interruptible,
 595                         s64 *timeout,
 596                         struct intel_rps_client *rps)
 597 {
 598         int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 599         DEFINE_WAIT(reset);
 600         struct intel_wait wait;
 601         unsigned long timeout_remain;
 602         int ret = 0;
 603
 604         might_sleep();
 605
 606         if (list_empty(&req->list))
 607                 return 0;
 608
 609         if (i915_gem_request_completed(req))
 610                 return 0;
 611
 612         timeout_remain = MAX_SCHEDULE_TIMEOUT;
 613         if (timeout) {
 614                 if (WARN_ON(*timeout < 0))
 615                         return -EINVAL;
 616
 617                 if (*timeout == 0)
 618                         return -ETIME;
 619
 620                 /* Record current time in case interrupted, or wedged */
 621                 timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 622                 *timeout += ktime_get_raw_ns();
 623         }
 624
 625         trace_i915_gem_request_wait_begin(req);
 626
 627         /* This client is about to stall waiting for the GPU. In many cases
 628          * this is undesirable and limits the throughput of the system, as
 629          * many clients cannot continue processing user input/output whilst
 630          * blocked. RPS autotuning may take tens of milliseconds to respond
 631          * to the GPU load and thus incurs additional latency for the client.
 632          * We can circumvent that by promoting the GPU frequency to maximum
 633          * before we wait. This makes the GPU throttle up much more quickly
 634          * (good for benchmarks and user experience, e.g. window animations),
 635          * but at a cost of spending more power processing the workload
 636          * (bad for battery). Not all clients even want their results
 637          * immediately and for them we should just let the GPU select its own
 638          * frequency to maximise efficiency. To prevent a single client from
 639          * forcing the clocks too high for the whole system, we only allow
 640          * each client to waitboost once in a busy period.
 641          */
 642         if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
 643                 gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 644
 645         /* Optimistic spin for the next ~jiffie before touching IRQs */
 646         if (i915_spin_request(req, state, 5))
 647                 goto complete;
 648
 649         set_current_state(state);
 650         add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 651
 652         intel_wait_init(&wait, req->fence.seqno);
 653         if (intel_engine_add_wait(req->engine, &wait))
 654                 /* In order to check that we haven't missed the interrupt
 655                  * as we enabled it, we need to kick ourselves to do a
 656                  * coherent check on the seqno before we sleep.
 657                  */
 658                 goto wakeup;
 659
 660         for (;;) {
 661                 if (signal_pending_state(state, current)) {
 662                         ret = -ERESTARTSYS;
 663                         break;
 664                 }
 665
 666                 timeout_remain = io_schedule_timeout(timeout_remain);
 667                 if (timeout_remain == 0) {
 668                         ret = -ETIME;
 669                         break;
 670                 }
 671
 672                 if (intel_wait_complete(&wait))
 673                         break;
 674
 675                 set_current_state(state);
 676
 677 wakeup:
 678                 /* Carefully check if the request is complete, giving time
 679                  * for the seqno to be visible following the interrupt.
 680                  * We also have to check in case we are kicked by the GPU
 681                  * reset in order to drop the struct_mutex.
 682                  */
 683                 if (__i915_request_irq_complete(req))
 684                         break;
 685
 686                 /* Only spin if we know the GPU is processing this request */
 687                 if (i915_spin_request(req, state, 2))
 688                         break;
 689         }
 690         remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 691
 692         intel_engine_remove_wait(req->engine, &wait);
 693         __set_current_state(TASK_RUNNING);
 694 complete:
 695         trace_i915_gem_request_wait_end(req);
 696
 697         if (timeout) {
 698                 *timeout -= ktime_get_raw_ns();
 699                 if (*timeout < 0)
 700                         *timeout = 0;
 701
 702                 /*
 703                  * Apparently ktime isn't accurate enough and occasionally has a
 704                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
 705                  * things up to make the test happy. We allow up to 1 jiffy.
 706                  *
 707                  * This is a regrssion from the timespec->ktime conversion.
 708                  */
 709                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
 710                         *timeout = 0;
 711         }
 712
 713         if (IS_RPS_USER(rps) &&
 714             req->fence.seqno == req->engine->last_submitted_seqno) {
 715                 /* The GPU is now idle and this client has stalled.
 716                  * Since no other client has submitted a request in the
 717                  * meantime, assume that this client is the only one
 718                  * supplying work to the GPU but is unable to keep that
 719                  * work supplied because it is waiting. Since the GPU is
 720                  * then never kept fully busy, RPS autoclocking will
 721                  * keep the clocks relatively low, causing further delays.
 722                  * Compensate by giving the synchronous client credit for
 723                  * a waitboost next time.
 724                  */
 725                 spin_lock(&req->i915->rps.client_lock);
 726                 list_del_init(&rps->link);
 727                 spin_unlock(&req->i915->rps.client_lock);
 728         }
 729
 730         return ret;
 731 }
 732
 733 /**
 734  * Waits for a request to be signaled, and cleans up the
 735  * request and object lists appropriately for that event.
 736  */
 737 int i915_wait_request(struct drm_i915_gem_request *req)
 738 {
 739         int ret;
 740
 741         GEM_BUG_ON(!req);
 742         lockdep_assert_held(&req->i915->drm.struct_mutex);
 743
 744         ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
 745         if (ret)
 746                 return ret;
 747
 748         /* If the GPU hung, we want to keep the requests to find the guilty. */
 749         if (!i915_reset_in_progress(&req->i915->gpu_error))
 750                 i915_gem_request_retire_upto(req);
 751
 752         return 0;
 753 }