drivers/gpu/drm/i915/i915_gem_request.h

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef I915_GEM_REQUEST_H
  26 #define I915_GEM_REQUEST_H
  27
  28 #include <linux/dma-fence.h>
  29
  30 #include "i915_gem.h"
  31 #include "i915_sw_fence.h"
  32
  33 struct drm_file;
  34 struct drm_i915_gem_object;
  35 struct drm_i915_gem_request;
  36
  37 struct intel_wait {
  38         struct rb_node node;
  39         struct task_struct *tsk;
  40         struct drm_i915_gem_request *request;
  41         u32 seqno;
  42 };
  43
  44 struct intel_signal_node {
  45         struct rb_node node;
  46         struct intel_wait wait;
  47 };
  48
  49 struct i915_dependency {
  50         struct i915_priotree *signaler;
  51         struct list_head signal_link;
  52         struct list_head wait_link;
  53         struct list_head dfs_link;
  54         unsigned long flags;
  55 #define I915_DEPENDENCY_ALLOC BIT(0)
  56 };
  57
  58 /* Requests exist in a complex web of interdependencies. Each request
  59  * has to wait for some other request to complete before it is ready to be run
  60  * (e.g. we have to wait until the pixels have been rendering into a texture
  61  * before we can copy from it). We track the readiness of a request in terms
  62  * of fences, but we also need to keep the dependency tree for the lifetime
  63  * of the request (beyond the life of an individual fence). We use the tree
  64  * at various points to reorder the requests whilst keeping the requests
  65  * in order with respect to their various dependencies.
  66  */
  67 struct i915_priotree {
  68         struct list_head signalers_list; /* those before us, we depend upon */
  69         struct list_head waiters_list; /* those after us, they depend upon us */
  70         struct rb_node node;
  71         int priority;
  72 #define I915_PRIORITY_MAX 1024
  73 #define I915_PRIORITY_NORMAL 0
  74 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
  75 };
  76
  77 struct i915_gem_capture_list {
  78         struct i915_gem_capture_list *next;
  79         struct i915_vma *vma;
  80 };
  81
  82 /**
  83  * Request queue structure.
  84  *
  85  * The request queue allows us to note sequence numbers that have been emitted
  86  * and may be associated with active buffers to be retired.
  87  *
  88  * By keeping this list, we can avoid having to do questionable sequence
  89  * number comparisons on buffer last_read|write_seqno. It also allows an
  90  * emission time to be associated with the request for tracking how far ahead
  91  * of the GPU the submission is.
  92  *
  93  * When modifying this structure be very aware that we perform a lockless
  94  * RCU lookup of it that may race against reallocation of the struct
  95  * from the slab freelist. We intentionally do not zero the structure on
  96  * allocation so that the lookup can use the dangling pointers (and is
  97  * cogniscent that those pointers may be wrong). Instead, everything that
  98  * needs to be initialised must be done so explicitly.
  99  *
 100  * The requests are reference counted.
 101  */
 102 struct drm_i915_gem_request {
 103         struct dma_fence fence;
 104         spinlock_t lock;
 105
 106         /** On Which ring this request was generated */
 107         struct drm_i915_private *i915;
 108
 109         /**
 110          * Context and ring buffer related to this request
 111          * Contexts are refcounted, so when this request is associated with a
 112          * context, we must increment the context's refcount, to guarantee that
 113          * it persists while any request is linked to it. Requests themselves
 114          * are also refcounted, so the request will only be freed when the last
 115          * reference to it is dismissed, and the code in
 116          * i915_gem_request_free() will then decrement the refcount on the
 117          * context.
 118          */
 119         struct i915_gem_context *ctx;
 120         struct intel_engine_cs *engine;
 121         struct intel_ring *ring;
 122         struct intel_timeline *timeline;
 123         struct intel_signal_node signaling;
 124
 125         /* Fences for the various phases in the request's lifetime.
 126          *
 127          * The submit fence is used to await upon all of the request's
 128          * dependencies. When it is signaled, the request is ready to run.
 129          * It is used by the driver to then queue the request for execution.
 130          */
 131         struct i915_sw_fence submit;
 132         wait_queue_t submitq;
 133         wait_queue_head_t execute;
 134
 135         /* A list of everyone we wait upon, and everyone who waits upon us.
 136          * Even though we will not be submitted to the hardware before the
 137          * submit fence is signaled (it waits for all external events as well
 138          * as our own requests), the scheduler still needs to know the
 139          * dependency tree for the lifetime of the request (from execbuf
 140          * to retirement), i.e. bidirectional dependency information for the
 141          * request not tied to individual fences.
 142          */
 143         struct i915_priotree priotree;
 144         struct i915_dependency dep;
 145
 146         /** GEM sequence number associated with this request on the
 147          * global execution timeline. It is zero when the request is not
 148          * on the HW queue (i.e. not on the engine timeline list).
 149          * Its value is guarded by the timeline spinlock.
 150          */
 151         u32 global_seqno;
 152
 153         /** Position in the ring of the start of the request */
 154         u32 head;
 155
 156         /**
 157          * Position in the ring of the start of the postfix.
 158          * This is required to calculate the maximum available ring space
 159          * without overwriting the postfix.
 160          */
 161         u32 postfix;
 162
 163         /** Position in the ring of the end of the whole request */
 164         u32 tail;
 165
 166         /** Position in the ring of the end of any workarounds after the tail */
 167         u32 wa_tail;
 168
 169         /** Preallocate space in the ring for the emitting the request */
 170         u32 reserved_space;
 171
 172         /** Batch buffer related to this request if any (used for
 173          * error state dump only).
 174          */
 175         struct i915_vma *batch;
 176         /** Additional buffers requested by userspace to be captured upon
 177          * a GPU hang. The vma/obj on this list are protected by their
 178          * active reference - all objects on this list must also be
 179          * on the active_list (of their final request).
 180          */
 181         struct i915_gem_capture_list *capture_list;
 182         struct list_head active_list;
 183
 184         /** Time at which this request was emitted, in jiffies. */
 185         unsigned long emitted_jiffies;
 186
 187         /** engine->request_list entry for this request */
 188         struct list_head link;
 189
 190         /** ring->request_list entry for this request */
 191         struct list_head ring_link;
 192
 193         struct drm_i915_file_private *file_priv;
 194         /** file_priv list entry for this request */
 195         struct list_head client_link;
 196 };
 197
 198 extern const struct dma_fence_ops i915_fence_ops;
 199
 200 static inline bool dma_fence_is_i915(const struct dma_fence *fence)
 201 {
 202         return fence->ops == &i915_fence_ops;
 203 }
 204
 205 struct drm_i915_gem_request * __must_check
 206 i915_gem_request_alloc(struct intel_engine_cs *engine,
 207                        struct i915_gem_context *ctx);
 208 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
 209
 210 static inline struct drm_i915_gem_request *
 211 to_request(struct dma_fence *fence)
 212 {
 213         /* We assume that NULL fence/request are interoperable */
 214         BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
 215         GEM_BUG_ON(fence && !dma_fence_is_i915(fence));
 216         return container_of(fence, struct drm_i915_gem_request, fence);
 217 }
 218
 219 static inline struct drm_i915_gem_request *
 220 i915_gem_request_get(struct drm_i915_gem_request *req)
 221 {
 222         return to_request(dma_fence_get(&req->fence));
 223 }
 224
 225 static inline struct drm_i915_gem_request *
 226 i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
 227 {
 228         return to_request(dma_fence_get_rcu(&req->fence));
 229 }
 230
 231 static inline void
 232 i915_gem_request_put(struct drm_i915_gem_request *req)
 233 {
 234         dma_fence_put(&req->fence);
 235 }
 236
 237 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 238                                            struct drm_i915_gem_request *src)
 239 {
 240         if (src)
 241                 i915_gem_request_get(src);
 242
 243         if (*pdst)
 244                 i915_gem_request_put(*pdst);
 245
 246         *pdst = src;
 247 }
 248
 249 /**
 250  * i915_gem_request_global_seqno - report the current global seqno
 251  * @request - the request
 252  *
 253  * A request is assigned a global seqno only when it is on the hardware
 254  * execution queue. The global seqno can be used to maintain a list of
 255  * requests on the same engine in retirement order, for example for
 256  * constructing a priority queue for waiting. Prior to its execution, or
 257  * if it is subsequently removed in the event of preemption, its global
 258  * seqno is zero. As both insertion and removal from the execution queue
 259  * may operate in IRQ context, it is not guarded by the usual struct_mutex
 260  * BKL. Instead those relying on the global seqno must be prepared for its
 261  * value to change between reads. Only when the request is complete can
 262  * the global seqno be stable (due to the memory barriers on submitting
 263  * the commands to the hardware to write the breadcrumb, if the HWS shows
 264  * that it has passed the global seqno and the global seqno is unchanged
 265  * after the read, it is indeed complete).
 266  */
 267 static u32
 268 i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
 269 {
 270         return READ_ONCE(request->global_seqno);
 271 }
 272
 273 int
 274 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 275                               struct drm_i915_gem_object *obj,
 276                               bool write);
 277 int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 278                                      struct dma_fence *fence);
 279
 280 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 281 #define i915_add_request(req) \
 282         __i915_add_request(req, false)
 283
 284 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 285 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 286
 287 void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
 288 void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
 289
 290 struct intel_rps_client;
 291 #define NO_WAITBOOST ERR_PTR(-1)
 292 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
 293 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 294
 295 long i915_wait_request(struct drm_i915_gem_request *req,
 296                        unsigned int flags,
 297                        long timeout)
 298         __attribute__((nonnull(1)));
 299 #define I915_WAIT_INTERRUPTIBLE BIT(0)
 300 #define I915_WAIT_LOCKED        BIT(1) /* struct_mutex held, handle GPU reset */
 301 #define I915_WAIT_ALL           BIT(2) /* used by i915_gem_object_wait() */
 302
 303 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 304
 305 /**
 306  * Returns true if seq1 is later than seq2.
 307  */
 308 static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 309 {
 310         return (s32)(seq1 - seq2) >= 0;
 311 }
 312
 313 static inline bool
 314 __i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
 315 {
 316         GEM_BUG_ON(!seqno);
 317         return i915_seqno_passed(intel_engine_get_seqno(req->engine),
 318                                  seqno - 1);
 319 }
 320
 321 static inline bool
 322 i915_gem_request_started(const struct drm_i915_gem_request *req)
 323 {
 324         u32 seqno;
 325
 326         seqno = i915_gem_request_global_seqno(req);
 327         if (!seqno)
 328                 return false;
 329
 330         return __i915_gem_request_started(req, seqno);
 331 }
 332
 333 static inline bool
 334 __i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
 335 {
 336         GEM_BUG_ON(!seqno);
 337         return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
 338                 seqno == i915_gem_request_global_seqno(req);
 339 }
 340
 341 static inline bool
 342 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 343 {
 344         u32 seqno;
 345
 346         seqno = i915_gem_request_global_seqno(req);
 347         if (!seqno)
 348                 return false;
 349
 350         return __i915_gem_request_completed(req, seqno);
 351 }
 352
 353 bool __i915_spin_request(const struct drm_i915_gem_request *request,
 354                          u32 seqno, int state, unsigned long timeout_us);
 355 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
 356                                      int state, unsigned long timeout_us)
 357 {
 358         u32 seqno;
 359
 360         seqno = i915_gem_request_global_seqno(request);
 361         if (!seqno)
 362                 return 0;
 363
 364         return (__i915_gem_request_started(request, seqno) &&
 365                 __i915_spin_request(request, seqno, state, timeout_us));
 366 }
 367
 368 /* We treat requests as fences. This is not be to confused with our
 369  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
 370  * We use the fences to synchronize access from the CPU with activity on the
 371  * GPU, for example, we should not rewrite an object's PTE whilst the GPU
 372  * is reading them. We also track fences at a higher level to provide
 373  * implicit synchronisation around GEM objects, e.g. set-domain will wait
 374  * for outstanding GPU rendering before marking the object ready for CPU
 375  * access, or a pageflip will wait until the GPU is complete before showing
 376  * the frame on the scanout.
 377  *
 378  * In order to use a fence, the object must track the fence it needs to
 379  * serialise with. For example, GEM objects want to track both read and
 380  * write access so that we can perform concurrent read operations between
 381  * the CPU and GPU engines, as well as waiting for all rendering to
 382  * complete, or waiting for the last GPU user of a "fence register". The
 383  * object then embeds a #i915_gem_active to track the most recent (in
 384  * retirement order) request relevant for the desired mode of access.
 385  * The #i915_gem_active is updated with i915_gem_active_set() to track the
 386  * most recent fence request, typically this is done as part of
 387  * i915_vma_move_to_active().
 388  *
 389  * When the #i915_gem_active completes (is retired), it will
 390  * signal its completion to the owner through a callback as well as mark
 391  * itself as idle (i915_gem_active.request == NULL). The owner
 392  * can then perform any action, such as delayed freeing of an active
 393  * resource including itself.
 394  */
 395 struct i915_gem_active;
 396
 397 typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
 398                                    struct drm_i915_gem_request *);
 399
 400 struct i915_gem_active {
 401         struct drm_i915_gem_request __rcu *request;
 402         struct list_head link;
 403         i915_gem_retire_fn retire;
 404 };
 405
 406 void i915_gem_retire_noop(struct i915_gem_active *,
 407                           struct drm_i915_gem_request *request);
 408
 409 /**
 410  * init_request_active - prepares the activity tracker for use
 411  * @active - the active tracker
 412  * @func - a callback when then the tracker is retired (becomes idle),
 413  *         can be NULL
 414  *
 415  * init_request_active() prepares the embedded @active struct for use as
 416  * an activity tracker, that is for tracking the last known active request
 417  * associated with it. When the last request becomes idle, when it is retired
 418  * after completion, the optional callback @func is invoked.
 419  */
 420 static inline void
 421 init_request_active(struct i915_gem_active *active,
 422                     i915_gem_retire_fn retire)
 423 {
 424         INIT_LIST_HEAD(&active->link);
 425         active->retire = retire ?: i915_gem_retire_noop;
 426 }
 427
 428 /**
 429  * i915_gem_active_set - updates the tracker to watch the current request
 430  * @active - the active tracker
 431  * @request - the request to watch
 432  *
 433  * i915_gem_active_set() watches the given @request for completion. Whilst
 434  * that @request is busy, the @active reports busy. When that @request is
 435  * retired, the @active tracker is updated to report idle.
 436  */
 437 static inline void
 438 i915_gem_active_set(struct i915_gem_active *active,
 439                     struct drm_i915_gem_request *request)
 440 {
 441         list_move(&active->link, &request->active_list);
 442         rcu_assign_pointer(active->request, request);
 443 }
 444
 445 /**
 446  * i915_gem_active_set_retire_fn - updates the retirement callback
 447  * @active - the active tracker
 448  * @fn - the routine called when the request is retired
 449  * @mutex - struct_mutex used to guard retirements
 450  *
 451  * i915_gem_active_set_retire_fn() updates the function pointer that
 452  * is called when the final request associated with the @active tracker
 453  * is retired.
 454  */
 455 static inline void
 456 i915_gem_active_set_retire_fn(struct i915_gem_active *active,
 457                               i915_gem_retire_fn fn,
 458                               struct mutex *mutex)
 459 {
 460         lockdep_assert_held(mutex);
 461         active->retire = fn ?: i915_gem_retire_noop;
 462 }
 463
 464 static inline struct drm_i915_gem_request *
 465 __i915_gem_active_peek(const struct i915_gem_active *active)
 466 {
 467         /* Inside the error capture (running with the driver in an unknown
 468          * state), we want to bend the rules slightly (a lot).
 469          *
 470          * Work is in progress to make it safer, in the meantime this keeps
 471          * the known issue from spamming the logs.
 472          */
 473         return rcu_dereference_protected(active->request, 1);
 474 }
 475
 476 /**
 477  * i915_gem_active_raw - return the active request
 478  * @active - the active tracker
 479  *
 480  * i915_gem_active_raw() returns the current request being tracked, or NULL.
 481  * It does not obtain a reference on the request for the caller, so the caller
 482  * must hold struct_mutex.
 483  */
 484 static inline struct drm_i915_gem_request *
 485 i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
 486 {
 487         return rcu_dereference_protected(active->request,
 488                                          lockdep_is_held(mutex));
 489 }
 490
 491 /**
 492  * i915_gem_active_peek - report the active request being monitored
 493  * @active - the active tracker
 494  *
 495  * i915_gem_active_peek() returns the current request being tracked if
 496  * still active, or NULL. It does not obtain a reference on the request
 497  * for the caller, so the caller must hold struct_mutex.
 498  */
 499 static inline struct drm_i915_gem_request *
 500 i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
 501 {
 502         struct drm_i915_gem_request *request;
 503
 504         request = i915_gem_active_raw(active, mutex);
 505         if (!request || i915_gem_request_completed(request))
 506                 return NULL;
 507
 508         return request;
 509 }
 510
 511 /**
 512  * i915_gem_active_get - return a reference to the active request
 513  * @active - the active tracker
 514  *
 515  * i915_gem_active_get() returns a reference to the active request, or NULL
 516  * if the active tracker is idle. The caller must hold struct_mutex.
 517  */
 518 static inline struct drm_i915_gem_request *
 519 i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
 520 {
 521         return i915_gem_request_get(i915_gem_active_peek(active, mutex));
 522 }
 523
 524 /**
 525  * __i915_gem_active_get_rcu - return a reference to the active request
 526  * @active - the active tracker
 527  *
 528  * __i915_gem_active_get() returns a reference to the active request, or NULL
 529  * if the active tracker is idle. The caller must hold the RCU read lock, but
 530  * the returned pointer is safe to use outside of RCU.
 531  */
 532 static inline struct drm_i915_gem_request *
 533 __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 534 {
 535         /* Performing a lockless retrieval of the active request is super
 536          * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
 537          * slab of request objects will not be freed whilst we hold the
 538          * RCU read lock. It does not guarantee that the request itself
 539          * will not be freed and then *reused*. Viz,
 540          *
 541          * Thread A                     Thread B
 542          *
 543          * req = active.request
 544          *                              retire(req) -> free(req);
 545          *                              (req is now first on the slab freelist)
 546          *                              active.request = NULL
 547          *
 548          *                              req = new submission on a new object
 549          * ref(req)
 550          *
 551          * To prevent the request from being reused whilst the caller
 552          * uses it, we take a reference like normal. Whilst acquiring
 553          * the reference we check that it is not in a destroyed state
 554          * (refcnt == 0). That prevents the request being reallocated
 555          * whilst the caller holds on to it. To check that the request
 556          * was not reallocated as we acquired the reference we have to
 557          * check that our request remains the active request across
 558          * the lookup, in the same manner as a seqlock. The visibility
 559          * of the pointer versus the reference counting is controlled
 560          * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
 561          *
 562          * In the middle of all that, we inspect whether the request is
 563          * complete. Retiring is lazy so the request may be completed long
 564          * before the active tracker is updated. Querying whether the
 565          * request is complete is far cheaper (as it involves no locked
 566          * instructions setting cachelines to exclusive) than acquiring
 567          * the reference, so we do it first. The RCU read lock ensures the
 568          * pointer dereference is valid, but does not ensure that the
 569          * seqno nor HWS is the right one! However, if the request was
 570          * reallocated, that means the active tracker's request was complete.
 571          * If the new request is also complete, then both are and we can
 572          * just report the active tracker is idle. If the new request is
 573          * incomplete, then we acquire a reference on it and check that
 574          * it remained the active request.
 575          *
 576          * It is then imperative that we do not zero the request on
 577          * reallocation, so that we can chase the dangling pointers!
 578          * See i915_gem_request_alloc().
 579          */
 580         do {
 581                 struct drm_i915_gem_request *request;
 582
 583                 request = rcu_dereference(active->request);
 584                 if (!request || i915_gem_request_completed(request))
 585                         return NULL;
 586
 587                 /* An especially silly compiler could decide to recompute the
 588                  * result of i915_gem_request_completed, more specifically
 589                  * re-emit the load for request->fence.seqno. A race would catch
 590                  * a later seqno value, which could flip the result from true to
 591                  * false. Which means part of the instructions below might not
 592                  * be executed, while later on instructions are executed. Due to
 593                  * barriers within the refcounting the inconsistency can't reach
 594                  * past the call to i915_gem_request_get_rcu, but not executing
 595                  * that while still executing i915_gem_request_put() creates
 596                  * havoc enough.  Prevent this with a compiler barrier.
 597                  */
 598                 barrier();
 599
 600                 request = i915_gem_request_get_rcu(request);
 601
 602                 /* What stops the following rcu_access_pointer() from occurring
 603                  * before the above i915_gem_request_get_rcu()? If we were
 604                  * to read the value before pausing to get the reference to
 605                  * the request, we may not notice a change in the active
 606                  * tracker.
 607                  *
 608                  * The rcu_access_pointer() is a mere compiler barrier, which
 609                  * means both the CPU and compiler are free to perform the
 610                  * memory read without constraint. The compiler only has to
 611                  * ensure that any operations after the rcu_access_pointer()
 612                  * occur afterwards in program order. This means the read may
 613                  * be performed earlier by an out-of-order CPU, or adventurous
 614                  * compiler.
 615                  *
 616                  * The atomic operation at the heart of
 617                  * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is
 618                  * atomic_inc_not_zero() which is only a full memory barrier
 619                  * when successful. That is, if i915_gem_request_get_rcu()
 620                  * returns the request (and so with the reference counted
 621                  * incremented) then the following read for rcu_access_pointer()
 622                  * must occur after the atomic operation and so confirm
 623                  * that this request is the one currently being tracked.
 624                  *
 625                  * The corresponding write barrier is part of
 626                  * rcu_assign_pointer().
 627                  */
 628                 if (!request || request == rcu_access_pointer(active->request))
 629                         return rcu_pointer_handoff(request);
 630
 631                 i915_gem_request_put(request);
 632         } while (1);
 633 }
 634
 635 /**
 636  * i915_gem_active_get_unlocked - return a reference to the active request
 637  * @active - the active tracker
 638  *
 639  * i915_gem_active_get_unlocked() returns a reference to the active request,
 640  * or NULL if the active tracker is idle. The reference is obtained under RCU,
 641  * so no locking is required by the caller.
 642  *
 643  * The reference should be freed with i915_gem_request_put().
 644  */
 645 static inline struct drm_i915_gem_request *
 646 i915_gem_active_get_unlocked(const struct i915_gem_active *active)
 647 {
 648         struct drm_i915_gem_request *request;
 649
 650         rcu_read_lock();
 651         request = __i915_gem_active_get_rcu(active);
 652         rcu_read_unlock();
 653
 654         return request;
 655 }
 656
 657 /**
 658  * i915_gem_active_isset - report whether the active tracker is assigned
 659  * @active - the active tracker
 660  *
 661  * i915_gem_active_isset() returns true if the active tracker is currently
 662  * assigned to a request. Due to the lazy retiring, that request may be idle
 663  * and this may report stale information.
 664  */
 665 static inline bool
 666 i915_gem_active_isset(const struct i915_gem_active *active)
 667 {
 668         return rcu_access_pointer(active->request);
 669 }
 670
 671 /**
 672  * i915_gem_active_wait - waits until the request is completed
 673  * @active - the active request on which to wait
 674  * @flags - how to wait
 675  * @timeout - how long to wait at most
 676  * @rps - userspace client to charge for a waitboost
 677  *
 678  * i915_gem_active_wait() waits until the request is completed before
 679  * returning, without requiring any locks to be held. Note that it does not
 680  * retire any requests before returning.
 681  *
 682  * This function relies on RCU in order to acquire the reference to the active
 683  * request without holding any locks. See __i915_gem_active_get_rcu() for the
 684  * glory details on how that is managed. Once the reference is acquired, we
 685  * can then wait upon the request, and afterwards release our reference,
 686  * free of any locking.
 687  *
 688  * This function wraps i915_wait_request(), see it for the full details on
 689  * the arguments.
 690  *
 691  * Returns 0 if successful, or a negative error code.
 692  */
 693 static inline int
 694 i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
 695 {
 696         struct drm_i915_gem_request *request;
 697         long ret = 0;
 698
 699         request = i915_gem_active_get_unlocked(active);
 700         if (request) {
 701                 ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT);
 702                 i915_gem_request_put(request);
 703         }
 704
 705         return ret < 0 ? ret : 0;
 706 }
 707
 708 /**
 709  * i915_gem_active_retire - waits until the request is retired
 710  * @active - the active request on which to wait
 711  *
 712  * i915_gem_active_retire() waits until the request is completed,
 713  * and then ensures that at least the retirement handler for this
 714  * @active tracker is called before returning. If the @active
 715  * tracker is idle, the function returns immediately.
 716  */
 717 static inline int __must_check
 718 i915_gem_active_retire(struct i915_gem_active *active,
 719                        struct mutex *mutex)
 720 {
 721         struct drm_i915_gem_request *request;
 722         long ret;
 723
 724         request = i915_gem_active_raw(active, mutex);
 725         if (!request)
 726                 return 0;
 727
 728         ret = i915_wait_request(request,
 729                                 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 730                                 MAX_SCHEDULE_TIMEOUT);
 731         if (ret < 0)
 732                 return ret;
 733
 734         list_del_init(&active->link);
 735         RCU_INIT_POINTER(active->request, NULL);
 736
 737         active->retire(active, request);
 738
 739         return 0;
 740 }
 741
 742 #define for_each_active(mask, idx) \
 743         for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
 744
 745 #endif /* I915_GEM_REQUEST_H */