drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "gem/i915_gem_context.h"
 137
 138 #include "i915_drv.h"
 139 #include "i915_perf.h"
 140 #include "i915_trace.h"
 141 #include "i915_vgpu.h"
 142 #include "intel_engine_pm.h"
 143 #include "intel_gt.h"
 144 #include "intel_gt_pm.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_workarounds.h"
 149
 150 #define RING_EXECLIST_QFULL             (1 << 0x2)
 151 #define RING_EXECLIST1_VALID            (1 << 0x3)
 152 #define RING_EXECLIST0_VALID            (1 << 0x4)
 153 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 154 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 155 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 156
 157 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 158 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 159 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 160 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 161 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 162 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 163
 164 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 165          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 166
 167 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 168
 169 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 170 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 171 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 172 #define GEN12_IDLE_CTX_ID               0x7FF
 173 #define GEN12_CSB_CTX_VALID(csb_dw) \
 174         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 175
 176 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 177 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 178 #define WA_TAIL_DWORDS 2
 179 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 180
 181 struct virtual_engine {
 182         struct intel_engine_cs base;
 183         struct intel_context context;
 184
 185         /*
 186          * We allow only a single request through the virtual engine at a time
 187          * (each request in the timeline waits for the completion fence of
 188          * the previous before being submitted). By restricting ourselves to
 189          * only submitting a single request, each request is placed on to a
 190          * physical to maximise load spreading (by virtue of the late greedy
 191          * scheduling -- each real engine takes the next available request
 192          * upon idling).
 193          */
 194         struct i915_request *request;
 195
 196         /*
 197          * We keep a rbtree of available virtual engines inside each physical
 198          * engine, sorted by priority. Here we preallocate the nodes we need
 199          * for the virtual engine, indexed by physical_engine->id.
 200          */
 201         struct ve_node {
 202                 struct rb_node rb;
 203                 int prio;
 204         } nodes[I915_NUM_ENGINES];
 205
 206         /*
 207          * Keep track of bonded pairs -- restrictions upon on our selection
 208          * of physical engines any particular request may be submitted to.
 209          * If we receive a submit-fence from a master engine, we will only
 210          * use one of sibling_mask physical engines.
 211          */
 212         struct ve_bond {
 213                 const struct intel_engine_cs *master;
 214                 intel_engine_mask_t sibling_mask;
 215         } *bonds;
 216         unsigned int num_bonds;
 217
 218         /* And finally, which physical engines this virtual engine maps onto. */
 219         unsigned int num_siblings;
 220         struct intel_engine_cs *siblings[0];
 221 };
 222
 223 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 224 {
 225         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 226         return container_of(engine, struct virtual_engine, base);
 227 }
 228
 229 static int __execlists_context_alloc(struct intel_context *ce,
 230                                      struct intel_engine_cs *engine);
 231
 232 static void execlists_init_reg_state(u32 *reg_state,
 233                                      const struct intel_context *ce,
 234                                      const struct intel_engine_cs *engine,
 235                                      const struct intel_ring *ring,
 236                                      bool close);
 237
 238 static void __context_pin_acquire(struct intel_context *ce)
 239 {
 240         mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
 241 }
 242
 243 static void __context_pin_release(struct intel_context *ce)
 244 {
 245         mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
 246 }
 247
 248 static void mark_eio(struct i915_request *rq)
 249 {
 250         if (i915_request_completed(rq))
 251                 return;
 252
 253         GEM_BUG_ON(i915_request_signaled(rq));
 254
 255         dma_fence_set_error(&rq->fence, -EIO);
 256         i915_request_mark_complete(rq);
 257 }
 258
 259 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 260 {
 261         return (i915_ggtt_offset(engine->status_page.vma) +
 262                 I915_GEM_HWS_PREEMPT_ADDR);
 263 }
 264
 265 static inline void
 266 ring_set_paused(const struct intel_engine_cs *engine, int state)
 267 {
 268         /*
 269          * We inspect HWS_PREEMPT with a semaphore inside
 270          * engine->emit_fini_breadcrumb. If the dword is true,
 271          * the ring is paused as the semaphore will busywait
 272          * until the dword is false.
 273          */
 274         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 275         if (state)
 276                 wmb();
 277 }
 278
 279 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 280 {
 281         return rb_entry(rb, struct i915_priolist, node);
 282 }
 283
 284 static inline int rq_prio(const struct i915_request *rq)
 285 {
 286         return rq->sched.attr.priority;
 287 }
 288
 289 static int effective_prio(const struct i915_request *rq)
 290 {
 291         int prio = rq_prio(rq);
 292
 293         /*
 294          * If this request is special and must not be interrupted at any
 295          * cost, so be it. Note we are only checking the most recent request
 296          * in the context and so may be masking an earlier vip request. It
 297          * is hoped that under the conditions where nopreempt is used, this
 298          * will not matter (i.e. all requests to that context will be
 299          * nopreempt for as long as desired).
 300          */
 301         if (i915_request_has_nopreempt(rq))
 302                 prio = I915_PRIORITY_UNPREEMPTABLE;
 303
 304         /*
 305          * On unwinding the active request, we give it a priority bump
 306          * if it has completed waiting on any semaphore. If we know that
 307          * the request has already started, we can prevent an unwanted
 308          * preempt-to-idle cycle by taking that into account now.
 309          */
 310         if (__i915_request_has_started(rq))
 311                 prio |= I915_PRIORITY_NOSEMAPHORE;
 312
 313         /* Restrict mere WAIT boosts from triggering preemption */
 314         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 315         return prio | __NO_PREEMPTION;
 316 }
 317
 318 static int queue_prio(const struct intel_engine_execlists *execlists)
 319 {
 320         struct i915_priolist *p;
 321         struct rb_node *rb;
 322
 323         rb = rb_first_cached(&execlists->queue);
 324         if (!rb)
 325                 return INT_MIN;
 326
 327         /*
 328          * As the priolist[] are inverted, with the highest priority in [0],
 329          * we have to flip the index value to become priority.
 330          */
 331         p = to_priolist(rb);
 332         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 333 }
 334
 335 static inline bool need_preempt(const struct intel_engine_cs *engine,
 336                                 const struct i915_request *rq,
 337                                 struct rb_node *rb)
 338 {
 339         int last_prio;
 340
 341         if (!intel_engine_has_semaphores(engine))
 342                 return false;
 343
 344         /*
 345          * Check if the current priority hint merits a preemption attempt.
 346          *
 347          * We record the highest value priority we saw during rescheduling
 348          * prior to this dequeue, therefore we know that if it is strictly
 349          * less than the current tail of ESLP[0], we do not need to force
 350          * a preempt-to-idle cycle.
 351          *
 352          * However, the priority hint is a mere hint that we may need to
 353          * preempt. If that hint is stale or we may be trying to preempt
 354          * ourselves, ignore the request.
 355          */
 356         last_prio = effective_prio(rq);
 357         if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
 358                                          last_prio))
 359                 return false;
 360
 361         /*
 362          * Check against the first request in ELSP[1], it will, thanks to the
 363          * power of PI, be the highest priority of that context.
 364          */
 365         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 366             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 367                 return true;
 368
 369         if (rb) {
 370                 struct virtual_engine *ve =
 371                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 372                 bool preempt = false;
 373
 374                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 375                         struct i915_request *next;
 376
 377                         rcu_read_lock();
 378                         next = READ_ONCE(ve->request);
 379                         if (next)
 380                                 preempt = rq_prio(next) > last_prio;
 381                         rcu_read_unlock();
 382                 }
 383
 384                 if (preempt)
 385                         return preempt;
 386         }
 387
 388         /*
 389          * If the inflight context did not trigger the preemption, then maybe
 390          * it was the set of queued requests? Pick the highest priority in
 391          * the queue (the first active priolist) and see if it deserves to be
 392          * running instead of ELSP[0].
 393          *
 394          * The highest priority request in the queue can not be either
 395          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 396          * context, it's priority would not exceed ELSP[0] aka last_prio.
 397          */
 398         return queue_prio(&engine->execlists) > last_prio;
 399 }
 400
 401 __maybe_unused static inline bool
 402 assert_priority_queue(const struct i915_request *prev,
 403                       const struct i915_request *next)
 404 {
 405         /*
 406          * Without preemption, the prev may refer to the still active element
 407          * which we refuse to let go.
 408          *
 409          * Even with preemption, there are times when we think it is better not
 410          * to preempt and leave an ostensibly lower priority request in flight.
 411          */
 412         if (i915_request_is_active(prev))
 413                 return true;
 414
 415         return rq_prio(prev) >= rq_prio(next);
 416 }
 417
 418 /*
 419  * The context descriptor encodes various attributes of a context,
 420  * including its GTT address and some flags. Because it's fairly
 421  * expensive to calculate, we'll just do it once and cache the result,
 422  * which remains valid until the context is unpinned.
 423  *
 424  * This is what a descriptor looks like, from LSB to MSB::
 425  *
 426  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 427  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 428  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 429  *      bits 53-54:    mbz, reserved for use by hardware
 430  *      bits 55-63:    group ID, currently unused and set to 0
 431  *
 432  * Starting from Gen11, the upper dword of the descriptor has a new format:
 433  *
 434  *      bits 32-36:    reserved
 435  *      bits 37-47:    SW context ID
 436  *      bits 48:53:    engine instance
 437  *      bit 54:        mbz, reserved for use by hardware
 438  *      bits 55-60:    SW counter
 439  *      bits 61-63:    engine class
 440  *
 441  * engine info, SW context ID and SW counter need to form a unique number
 442  * (Context ID) per lrc.
 443  */
 444 static u64
 445 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 446 {
 447         u64 desc;
 448
 449         desc = INTEL_LEGACY_32B_CONTEXT;
 450         if (i915_vm_is_4lvl(ce->vm))
 451                 desc = INTEL_LEGACY_64B_CONTEXT;
 452         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 453
 454         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 455         if (IS_GEN(engine->i915, 8))
 456                 desc |= GEN8_CTX_L3LLC_COHERENT;
 457
 458         desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
 459                                                                 /* bits 12-31 */
 460         /*
 461          * The following 32bits are copied into the OA reports (dword 2).
 462          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 463          * anything below.
 464          */
 465         if (INTEL_GEN(engine->i915) >= 11) {
 466                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 467                                                                 /* bits 48-53 */
 468
 469                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 470                                                                 /* bits 61-63 */
 471         }
 472
 473         return desc;
 474 }
 475
 476 static u32 *set_offsets(u32 *regs,
 477                         const u8 *data,
 478                         const struct intel_engine_cs *engine)
 479 #define NOP(x) (BIT(7) | (x))
 480 #define LRI(count, flags) ((flags) << 6 | (count))
 481 #define POSTED BIT(0)
 482 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 483 #define REG16(x) \
 484         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 485         (((x) >> 2) & 0x7f)
 486 #define END() 0
 487 {
 488         const u32 base = engine->mmio_base;
 489
 490         while (*data) {
 491                 u8 count, flags;
 492
 493                 if (*data & BIT(7)) { /* skip */
 494                         regs += *data++ & ~BIT(7);
 495                         continue;
 496                 }
 497
 498                 count = *data & 0x3f;
 499                 flags = *data >> 6;
 500                 data++;
 501
 502                 *regs = MI_LOAD_REGISTER_IMM(count);
 503                 if (flags & POSTED)
 504                         *regs |= MI_LRI_FORCE_POSTED;
 505                 if (INTEL_GEN(engine->i915) >= 11)
 506                         *regs |= MI_LRI_CS_MMIO;
 507                 regs++;
 508
 509                 GEM_BUG_ON(!count);
 510                 do {
 511                         u32 offset = 0;
 512                         u8 v;
 513
 514                         do {
 515                                 v = *data++;
 516                                 offset <<= 7;
 517                                 offset |= v & ~BIT(7);
 518                         } while (v & BIT(7));
 519
 520                         *regs = base + (offset << 2);
 521                         regs += 2;
 522                 } while (--count);
 523         }
 524
 525         return regs;
 526 }
 527
 528 static const u8 gen8_xcs_offsets[] = {
 529         NOP(1),
 530         LRI(11, 0),
 531         REG16(0x244),
 532         REG(0x034),
 533         REG(0x030),
 534         REG(0x038),
 535         REG(0x03c),
 536         REG(0x168),
 537         REG(0x140),
 538         REG(0x110),
 539         REG(0x11c),
 540         REG(0x114),
 541         REG(0x118),
 542
 543         NOP(9),
 544         LRI(9, 0),
 545         REG16(0x3a8),
 546         REG16(0x28c),
 547         REG16(0x288),
 548         REG16(0x284),
 549         REG16(0x280),
 550         REG16(0x27c),
 551         REG16(0x278),
 552         REG16(0x274),
 553         REG16(0x270),
 554
 555         NOP(13),
 556         LRI(2, 0),
 557         REG16(0x200),
 558         REG(0x028),
 559
 560         END(),
 561 };
 562
 563 static const u8 gen9_xcs_offsets[] = {
 564         NOP(1),
 565         LRI(14, POSTED),
 566         REG16(0x244),
 567         REG(0x034),
 568         REG(0x030),
 569         REG(0x038),
 570         REG(0x03c),
 571         REG(0x168),
 572         REG(0x140),
 573         REG(0x110),
 574         REG(0x11c),
 575         REG(0x114),
 576         REG(0x118),
 577         REG(0x1c0),
 578         REG(0x1c4),
 579         REG(0x1c8),
 580
 581         NOP(3),
 582         LRI(9, POSTED),
 583         REG16(0x3a8),
 584         REG16(0x28c),
 585         REG16(0x288),
 586         REG16(0x284),
 587         REG16(0x280),
 588         REG16(0x27c),
 589         REG16(0x278),
 590         REG16(0x274),
 591         REG16(0x270),
 592
 593         NOP(13),
 594         LRI(1, POSTED),
 595         REG16(0x200),
 596
 597         NOP(13),
 598         LRI(44, POSTED),
 599         REG(0x028),
 600         REG(0x09c),
 601         REG(0x0c0),
 602         REG(0x178),
 603         REG(0x17c),
 604         REG16(0x358),
 605         REG(0x170),
 606         REG(0x150),
 607         REG(0x154),
 608         REG(0x158),
 609         REG16(0x41c),
 610         REG16(0x600),
 611         REG16(0x604),
 612         REG16(0x608),
 613         REG16(0x60c),
 614         REG16(0x610),
 615         REG16(0x614),
 616         REG16(0x618),
 617         REG16(0x61c),
 618         REG16(0x620),
 619         REG16(0x624),
 620         REG16(0x628),
 621         REG16(0x62c),
 622         REG16(0x630),
 623         REG16(0x634),
 624         REG16(0x638),
 625         REG16(0x63c),
 626         REG16(0x640),
 627         REG16(0x644),
 628         REG16(0x648),
 629         REG16(0x64c),
 630         REG16(0x650),
 631         REG16(0x654),
 632         REG16(0x658),
 633         REG16(0x65c),
 634         REG16(0x660),
 635         REG16(0x664),
 636         REG16(0x668),
 637         REG16(0x66c),
 638         REG16(0x670),
 639         REG16(0x674),
 640         REG16(0x678),
 641         REG16(0x67c),
 642         REG(0x068),
 643
 644         END(),
 645 };
 646
 647 static const u8 gen12_xcs_offsets[] = {
 648         NOP(1),
 649         LRI(13, POSTED),
 650         REG16(0x244),
 651         REG(0x034),
 652         REG(0x030),
 653         REG(0x038),
 654         REG(0x03c),
 655         REG(0x168),
 656         REG(0x140),
 657         REG(0x110),
 658         REG(0x1c0),
 659         REG(0x1c4),
 660         REG(0x1c8),
 661         REG(0x180),
 662         REG16(0x2b4),
 663
 664         NOP(5),
 665         LRI(9, POSTED),
 666         REG16(0x3a8),
 667         REG16(0x28c),
 668         REG16(0x288),
 669         REG16(0x284),
 670         REG16(0x280),
 671         REG16(0x27c),
 672         REG16(0x278),
 673         REG16(0x274),
 674         REG16(0x270),
 675
 676         END(),
 677 };
 678
 679 static const u8 gen8_rcs_offsets[] = {
 680         NOP(1),
 681         LRI(14, POSTED),
 682         REG16(0x244),
 683         REG(0x034),
 684         REG(0x030),
 685         REG(0x038),
 686         REG(0x03c),
 687         REG(0x168),
 688         REG(0x140),
 689         REG(0x110),
 690         REG(0x11c),
 691         REG(0x114),
 692         REG(0x118),
 693         REG(0x1c0),
 694         REG(0x1c4),
 695         REG(0x1c8),
 696
 697         NOP(3),
 698         LRI(9, POSTED),
 699         REG16(0x3a8),
 700         REG16(0x28c),
 701         REG16(0x288),
 702         REG16(0x284),
 703         REG16(0x280),
 704         REG16(0x27c),
 705         REG16(0x278),
 706         REG16(0x274),
 707         REG16(0x270),
 708
 709         NOP(13),
 710         LRI(1, 0),
 711         REG(0x0c8),
 712
 713         END(),
 714 };
 715
 716 static const u8 gen11_rcs_offsets[] = {
 717         NOP(1),
 718         LRI(15, POSTED),
 719         REG16(0x244),
 720         REG(0x034),
 721         REG(0x030),
 722         REG(0x038),
 723         REG(0x03c),
 724         REG(0x168),
 725         REG(0x140),
 726         REG(0x110),
 727         REG(0x11c),
 728         REG(0x114),
 729         REG(0x118),
 730         REG(0x1c0),
 731         REG(0x1c4),
 732         REG(0x1c8),
 733         REG(0x180),
 734
 735         NOP(1),
 736         LRI(9, POSTED),
 737         REG16(0x3a8),
 738         REG16(0x28c),
 739         REG16(0x288),
 740         REG16(0x284),
 741         REG16(0x280),
 742         REG16(0x27c),
 743         REG16(0x278),
 744         REG16(0x274),
 745         REG16(0x270),
 746
 747         LRI(1, POSTED),
 748         REG(0x1b0),
 749
 750         NOP(10),
 751         LRI(1, 0),
 752         REG(0x0c8),
 753
 754         END(),
 755 };
 756
 757 static const u8 gen12_rcs_offsets[] = {
 758         NOP(1),
 759         LRI(13, POSTED),
 760         REG16(0x244),
 761         REG(0x034),
 762         REG(0x030),
 763         REG(0x038),
 764         REG(0x03c),
 765         REG(0x168),
 766         REG(0x140),
 767         REG(0x110),
 768         REG(0x1c0),
 769         REG(0x1c4),
 770         REG(0x1c8),
 771         REG(0x180),
 772         REG16(0x2b4),
 773
 774         NOP(5),
 775         LRI(9, POSTED),
 776         REG16(0x3a8),
 777         REG16(0x28c),
 778         REG16(0x288),
 779         REG16(0x284),
 780         REG16(0x280),
 781         REG16(0x27c),
 782         REG16(0x278),
 783         REG16(0x274),
 784         REG16(0x270),
 785
 786         LRI(3, POSTED),
 787         REG(0x1b0),
 788         REG16(0x5a8),
 789         REG16(0x5ac),
 790
 791         NOP(6),
 792         LRI(1, 0),
 793         REG(0x0c8),
 794
 795         END(),
 796 };
 797
 798 #undef END
 799 #undef REG16
 800 #undef REG
 801 #undef LRI
 802 #undef NOP
 803
 804 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 805 {
 806         /*
 807          * The gen12+ lists only have the registers we program in the basic
 808          * default state. We rely on the context image using relative
 809          * addressing to automatic fixup the register state between the
 810          * physical engines for virtual engine.
 811          */
 812         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
 813                    !intel_engine_has_relative_mmio(engine));
 814
 815         if (engine->class == RENDER_CLASS) {
 816                 if (INTEL_GEN(engine->i915) >= 12)
 817                         return gen12_rcs_offsets;
 818                 else if (INTEL_GEN(engine->i915) >= 11)
 819                         return gen11_rcs_offsets;
 820                 else
 821                         return gen8_rcs_offsets;
 822         } else {
 823                 if (INTEL_GEN(engine->i915) >= 12)
 824                         return gen12_xcs_offsets;
 825                 else if (INTEL_GEN(engine->i915) >= 9)
 826                         return gen9_xcs_offsets;
 827                 else
 828                         return gen8_xcs_offsets;
 829         }
 830 }
 831
 832 static void unwind_wa_tail(struct i915_request *rq)
 833 {
 834         rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
 835         assert_ring_tail_valid(rq->ring, rq->tail);
 836 }
 837
 838 static struct i915_request *
 839 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 840 {
 841         struct i915_request *rq, *rn, *active = NULL;
 842         struct list_head *uninitialized_var(pl);
 843         int prio = I915_PRIORITY_INVALID;
 844
 845         lockdep_assert_held(&engine->active.lock);
 846
 847         list_for_each_entry_safe_reverse(rq, rn,
 848                                          &engine->active.requests,
 849                                          sched.link) {
 850
 851                 if (i915_request_completed(rq))
 852                         continue; /* XXX */
 853
 854                 __i915_request_unsubmit(rq);
 855                 unwind_wa_tail(rq);
 856
 857                 /*
 858                  * Push the request back into the queue for later resubmission.
 859                  * If this request is not native to this physical engine (i.e.
 860                  * it came from a virtual source), push it back onto the virtual
 861                  * engine so that it can be moved across onto another physical
 862                  * engine as load dictates.
 863                  */
 864                 if (likely(rq->execution_mask == engine->mask)) {
 865                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 866                         if (rq_prio(rq) != prio) {
 867                                 prio = rq_prio(rq);
 868                                 pl = i915_sched_lookup_priolist(engine, prio);
 869                         }
 870                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 871
 872                         list_move(&rq->sched.link, pl);
 873                         active = rq;
 874                 } else {
 875                         struct intel_engine_cs *owner = rq->hw_context->engine;
 876
 877                         /*
 878                          * Decouple the virtual breadcrumb before moving it
 879                          * back to the virtual engine -- we don't want the
 880                          * request to complete in the background and try
 881                          * and cancel the breadcrumb on the virtual engine
 882                          * (instead of the old engine where it is linked)!
 883                          */
 884                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 885                                      &rq->fence.flags)) {
 886                                 spin_lock_nested(&rq->lock,
 887                                                  SINGLE_DEPTH_NESTING);
 888                                 i915_request_cancel_breadcrumb(rq);
 889                                 spin_unlock(&rq->lock);
 890                         }
 891                         rq->engine = owner;
 892                         owner->submit_request(rq);
 893                         active = NULL;
 894                 }
 895         }
 896
 897         return active;
 898 }
 899
 900 struct i915_request *
 901 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 902 {
 903         struct intel_engine_cs *engine =
 904                 container_of(execlists, typeof(*engine), execlists);
 905
 906         return __unwind_incomplete_requests(engine);
 907 }
 908
 909 static inline void
 910 execlists_context_status_change(struct i915_request *rq, unsigned long status)
 911 {
 912         /*
 913          * Only used when GVT-g is enabled now. When GVT-g is disabled,
 914          * The compiler should eliminate this function as dead-code.
 915          */
 916         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
 917                 return;
 918
 919         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
 920                                    status, rq);
 921 }
 922
 923 static inline struct intel_engine_cs *
 924 __execlists_schedule_in(struct i915_request *rq)
 925 {
 926         struct intel_engine_cs * const engine = rq->engine;
 927         struct intel_context * const ce = rq->hw_context;
 928
 929         intel_context_get(ce);
 930
 931         if (ce->tag) {
 932                 /* Use a fixed tag for OA and friends */
 933                 ce->lrc_desc |= (u64)ce->tag << 32;
 934         } else {
 935                 /* We don't need a strict matching tag, just different values */
 936                 ce->lrc_desc &= ~GENMASK_ULL(47, 37);
 937                 ce->lrc_desc |=
 938                         (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
 939                         GEN11_SW_CTX_ID_SHIFT;
 940                 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
 941         }
 942
 943         intel_gt_pm_get(engine->gt);
 944         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 945         intel_engine_context_in(engine);
 946
 947         return engine;
 948 }
 949
 950 static inline struct i915_request *
 951 execlists_schedule_in(struct i915_request *rq, int idx)
 952 {
 953         struct intel_context * const ce = rq->hw_context;
 954         struct intel_engine_cs *old;
 955
 956         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 957         trace_i915_request_in(rq, idx);
 958
 959         old = READ_ONCE(ce->inflight);
 960         do {
 961                 if (!old) {
 962                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
 963                         break;
 964                 }
 965         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 966
 967         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 968         return i915_request_get(rq);
 969 }
 970
 971 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 972 {
 973         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 974         struct i915_request *next = READ_ONCE(ve->request);
 975
 976         if (next && next->execution_mask & ~rq->execution_mask)
 977                 tasklet_schedule(&ve->base.execlists.tasklet);
 978 }
 979
 980 static inline void
 981 __execlists_schedule_out(struct i915_request *rq,
 982                          struct intel_engine_cs * const engine)
 983 {
 984         struct intel_context * const ce = rq->hw_context;
 985
 986         intel_engine_context_out(engine);
 987         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 988         intel_gt_pm_put(engine->gt);
 989
 990         /*
 991          * If this is part of a virtual engine, its next request may
 992          * have been blocked waiting for access to the active context.
 993          * We have to kick all the siblings again in case we need to
 994          * switch (e.g. the next request is not runnable on this
 995          * engine). Hopefully, we will already have submitted the next
 996          * request before the tasklet runs and do not need to rebuild
 997          * each virtual tree and kick everyone again.
 998          */
 999         if (ce->engine != engine)
1000                 kick_siblings(rq, ce);
1001
1002         intel_context_put(ce);
1003 }
1004
1005 static inline void
1006 execlists_schedule_out(struct i915_request *rq)
1007 {
1008         struct intel_context * const ce = rq->hw_context;
1009         struct intel_engine_cs *cur, *old;
1010
1011         trace_i915_request_out(rq);
1012
1013         old = READ_ONCE(ce->inflight);
1014         do
1015                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1016         while (!try_cmpxchg(&ce->inflight, &old, cur));
1017         if (!cur)
1018                 __execlists_schedule_out(rq, old);
1019
1020         i915_request_put(rq);
1021 }
1022
1023 static u64 execlists_update_context(const struct i915_request *rq)
1024 {
1025         struct intel_context *ce = rq->hw_context;
1026         u64 desc;
1027
1028         ce->lrc_reg_state[CTX_RING_TAIL] =
1029                 intel_ring_set_tail(rq->ring, rq->tail);
1030
1031         /*
1032          * Make sure the context image is complete before we submit it to HW.
1033          *
1034          * Ostensibly, writes (including the WCB) should be flushed prior to
1035          * an uncached write such as our mmio register access, the empirical
1036          * evidence (esp. on Braswell) suggests that the WC write into memory
1037          * may not be visible to the HW prior to the completion of the UC
1038          * register write and that we may begin execution from the context
1039          * before its image is complete leading to invalid PD chasing.
1040          *
1041          * Furthermore, Braswell, at least, wants a full mb to be sure that
1042          * the writes are coherent in memory (visible to the GPU) prior to
1043          * execution, and not just visible to other CPUs (as is the result of
1044          * wmb).
1045          */
1046         mb();
1047
1048         desc = ce->lrc_desc;
1049         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1050
1051         return desc;
1052 }
1053
1054 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1055 {
1056         if (execlists->ctrl_reg) {
1057                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1058                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1059         } else {
1060                 writel(upper_32_bits(desc), execlists->submit_reg);
1061                 writel(lower_32_bits(desc), execlists->submit_reg);
1062         }
1063 }
1064
1065 static __maybe_unused void
1066 trace_ports(const struct intel_engine_execlists *execlists,
1067             const char *msg,
1068             struct i915_request * const *ports)
1069 {
1070         const struct intel_engine_cs *engine =
1071                 container_of(execlists, typeof(*engine), execlists);
1072
1073         if (!ports[0])
1074                 return;
1075
1076         GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
1077                   engine->name, msg,
1078                   ports[0]->fence.context,
1079                   ports[0]->fence.seqno,
1080                   i915_request_completed(ports[0]) ? "!" :
1081                   i915_request_started(ports[0]) ? "*" :
1082                   "",
1083                   ports[1] ? ports[1]->fence.context : 0,
1084                   ports[1] ? ports[1]->fence.seqno : 0);
1085 }
1086
1087 static __maybe_unused bool
1088 assert_pending_valid(const struct intel_engine_execlists *execlists,
1089                      const char *msg)
1090 {
1091         struct i915_request * const *port, *rq;
1092         struct intel_context *ce = NULL;
1093
1094         trace_ports(execlists, msg, execlists->pending);
1095
1096         if (!execlists->pending[0]) {
1097                 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1098                 return false;
1099         }
1100
1101         if (execlists->pending[execlists_num_ports(execlists)]) {
1102                 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1103                               execlists_num_ports(execlists));
1104                 return false;
1105         }
1106
1107         for (port = execlists->pending; (rq = *port); port++) {
1108                 if (ce == rq->hw_context) {
1109                         GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
1110                                       port - execlists->pending);
1111                         return false;
1112                 }
1113
1114                 ce = rq->hw_context;
1115                 if (i915_request_completed(rq))
1116                         continue;
1117
1118                 if (i915_active_is_idle(&ce->active)) {
1119                         GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
1120                                       port - execlists->pending);
1121                         return false;
1122                 }
1123
1124                 if (!i915_vma_is_pinned(ce->state)) {
1125                         GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
1126                                       port - execlists->pending);
1127                         return false;
1128                 }
1129
1130                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1131                         GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
1132                                       port - execlists->pending);
1133                         return false;
1134                 }
1135         }
1136
1137         return ce;
1138 }
1139
1140 static void execlists_submit_ports(struct intel_engine_cs *engine)
1141 {
1142         struct intel_engine_execlists *execlists = &engine->execlists;
1143         unsigned int n;
1144
1145         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1146
1147         /*
1148          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1149          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1150          * not be relinquished until the device is idle (see
1151          * i915_gem_idle_work_handler()). As a precaution, we make sure
1152          * that all ELSP are drained i.e. we have processed the CSB,
1153          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1154          */
1155         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1156
1157         /*
1158          * ELSQ note: the submit queue is not cleared after being submitted
1159          * to the HW so we need to make sure we always clean it up. This is
1160          * currently ensured by the fact that we always write the same number
1161          * of elsq entries, keep this in mind before changing the loop below.
1162          */
1163         for (n = execlists_num_ports(execlists); n--; ) {
1164                 struct i915_request *rq = execlists->pending[n];
1165
1166                 write_desc(execlists,
1167                            rq ? execlists_update_context(rq) : 0,
1168                            n);
1169         }
1170
1171         /* we need to manually load the submit queue */
1172         if (execlists->ctrl_reg)
1173                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1174 }
1175
1176 static bool ctx_single_port_submission(const struct intel_context *ce)
1177 {
1178         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1179                 i915_gem_context_force_single_submission(ce->gem_context));
1180 }
1181
1182 static bool can_merge_ctx(const struct intel_context *prev,
1183                           const struct intel_context *next)
1184 {
1185         if (prev != next)
1186                 return false;
1187
1188         if (ctx_single_port_submission(prev))
1189                 return false;
1190
1191         return true;
1192 }
1193
1194 static bool can_merge_rq(const struct i915_request *prev,
1195                          const struct i915_request *next)
1196 {
1197         GEM_BUG_ON(prev == next);
1198         GEM_BUG_ON(!assert_priority_queue(prev, next));
1199
1200         /*
1201          * We do not submit known completed requests. Therefore if the next
1202          * request is already completed, we can pretend to merge it in
1203          * with the previous context (and we will skip updating the ELSP
1204          * and tracking). Thus hopefully keeping the ELSP full with active
1205          * contexts, despite the best efforts of preempt-to-busy to confuse
1206          * us.
1207          */
1208         if (i915_request_completed(next))
1209                 return true;
1210
1211         if (unlikely((prev->flags ^ next->flags) &
1212                      (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
1213                 return false;
1214
1215         if (!can_merge_ctx(prev->hw_context, next->hw_context))
1216                 return false;
1217
1218         return true;
1219 }
1220
1221 static void virtual_update_register_offsets(u32 *regs,
1222                                             struct intel_engine_cs *engine)
1223 {
1224         set_offsets(regs, reg_offsets(engine), engine);
1225 }
1226
1227 static bool virtual_matches(const struct virtual_engine *ve,
1228                             const struct i915_request *rq,
1229                             const struct intel_engine_cs *engine)
1230 {
1231         const struct intel_engine_cs *inflight;
1232
1233         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1234                 return false;
1235
1236         /*
1237          * We track when the HW has completed saving the context image
1238          * (i.e. when we have seen the final CS event switching out of
1239          * the context) and must not overwrite the context image before
1240          * then. This restricts us to only using the active engine
1241          * while the previous virtualized request is inflight (so
1242          * we reuse the register offsets). This is a very small
1243          * hystersis on the greedy seelction algorithm.
1244          */
1245         inflight = intel_context_inflight(&ve->context);
1246         if (inflight && inflight != engine)
1247                 return false;
1248
1249         return true;
1250 }
1251
1252 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1253                                      struct intel_engine_cs *engine)
1254 {
1255         struct intel_engine_cs *old = ve->siblings[0];
1256
1257         /* All unattached (rq->engine == old) must already be completed */
1258
1259         spin_lock(&old->breadcrumbs.irq_lock);
1260         if (!list_empty(&ve->context.signal_link)) {
1261                 list_move_tail(&ve->context.signal_link,
1262                                &engine->breadcrumbs.signalers);
1263                 intel_engine_queue_breadcrumbs(engine);
1264         }
1265         spin_unlock(&old->breadcrumbs.irq_lock);
1266 }
1267
1268 static struct i915_request *
1269 last_active(const struct intel_engine_execlists *execlists)
1270 {
1271         struct i915_request * const *last = READ_ONCE(execlists->active);
1272
1273         while (*last && i915_request_completed(*last))
1274                 last++;
1275
1276         return *last;
1277 }
1278
1279 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1280 {
1281         LIST_HEAD(list);
1282
1283         /*
1284          * We want to move the interrupted request to the back of
1285          * the round-robin list (i.e. its priority level), but
1286          * in doing so, we must then move all requests that were in
1287          * flight and were waiting for the interrupted request to
1288          * be run after it again.
1289          */
1290         do {
1291                 struct i915_dependency *p;
1292
1293                 GEM_BUG_ON(i915_request_is_active(rq));
1294                 list_move_tail(&rq->sched.link, pl);
1295
1296                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
1297                         struct i915_request *w =
1298                                 container_of(p->waiter, typeof(*w), sched);
1299
1300                         /* Leave semaphores spinning on the other engines */
1301                         if (w->engine != rq->engine)
1302                                 continue;
1303
1304                         /* No waiter should start before its signaler */
1305                         GEM_BUG_ON(i915_request_started(w) &&
1306                                    !i915_request_completed(rq));
1307
1308                         GEM_BUG_ON(i915_request_is_active(w));
1309                         if (list_empty(&w->sched.link))
1310                                 continue; /* Not yet submitted; unready */
1311
1312                         if (rq_prio(w) < rq_prio(rq))
1313                                 continue;
1314
1315                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1316                         list_move_tail(&w->sched.link, &list);
1317                 }
1318
1319                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1320         } while (rq);
1321 }
1322
1323 static void defer_active(struct intel_engine_cs *engine)
1324 {
1325         struct i915_request *rq;
1326
1327         rq = __unwind_incomplete_requests(engine);
1328         if (!rq)
1329                 return;
1330
1331         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1332 }
1333
1334 static bool
1335 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1336 {
1337         int hint;
1338
1339         if (!intel_engine_has_semaphores(engine))
1340                 return false;
1341
1342         if (list_is_last(&rq->sched.link, &engine->active.requests))
1343                 return false;
1344
1345         hint = max(rq_prio(list_next_entry(rq, sched.link)),
1346                    engine->execlists.queue_priority_hint);
1347
1348         return hint >= effective_prio(rq);
1349 }
1350
1351 static int
1352 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1353 {
1354         if (list_is_last(&rq->sched.link, &engine->active.requests))
1355                 return INT_MIN;
1356
1357         return rq_prio(list_next_entry(rq, sched.link));
1358 }
1359
1360 static bool
1361 enable_timeslice(const struct intel_engine_execlists *execlists)
1362 {
1363         const struct i915_request *rq = *execlists->active;
1364
1365         if (i915_request_completed(rq))
1366                 return false;
1367
1368         return execlists->switch_priority_hint >= effective_prio(rq);
1369 }
1370
1371 static void record_preemption(struct intel_engine_execlists *execlists)
1372 {
1373         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1374 }
1375
1376 static void execlists_dequeue(struct intel_engine_cs *engine)
1377 {
1378         struct intel_engine_execlists * const execlists = &engine->execlists;
1379         struct i915_request **port = execlists->pending;
1380         struct i915_request ** const last_port = port + execlists->port_mask;
1381         struct i915_request *last;
1382         struct rb_node *rb;
1383         bool submit = false;
1384
1385         /*
1386          * Hardware submission is through 2 ports. Conceptually each port
1387          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1388          * static for a context, and unique to each, so we only execute
1389          * requests belonging to a single context from each ring. RING_HEAD
1390          * is maintained by the CS in the context image, it marks the place
1391          * where it got up to last time, and through RING_TAIL we tell the CS
1392          * where we want to execute up to this time.
1393          *
1394          * In this list the requests are in order of execution. Consecutive
1395          * requests from the same context are adjacent in the ringbuffer. We
1396          * can combine these requests into a single RING_TAIL update:
1397          *
1398          *              RING_HEAD...req1...req2
1399          *                                    ^- RING_TAIL
1400          * since to execute req2 the CS must first execute req1.
1401          *
1402          * Our goal then is to point each port to the end of a consecutive
1403          * sequence of requests as being the most optimal (fewest wake ups
1404          * and context switches) submission.
1405          */
1406
1407         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1408                 struct virtual_engine *ve =
1409                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1410                 struct i915_request *rq = READ_ONCE(ve->request);
1411
1412                 if (!rq) { /* lazily cleanup after another engine handled rq */
1413                         rb_erase_cached(rb, &execlists->virtual);
1414                         RB_CLEAR_NODE(rb);
1415                         rb = rb_first_cached(&execlists->virtual);
1416                         continue;
1417                 }
1418
1419                 if (!virtual_matches(ve, rq, engine)) {
1420                         rb = rb_next(rb);
1421                         continue;
1422                 }
1423
1424                 break;
1425         }
1426
1427         /*
1428          * If the queue is higher priority than the last
1429          * request in the currently active context, submit afresh.
1430          * We will resubmit again afterwards in case we need to split
1431          * the active context to interject the preemption request,
1432          * i.e. we will retrigger preemption following the ack in case
1433          * of trouble.
1434          */
1435         last = last_active(execlists);
1436         if (last) {
1437                 if (need_preempt(engine, last, rb)) {
1438                         GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
1439                                   engine->name,
1440                                   last->fence.context,
1441                                   last->fence.seqno,
1442                                   last->sched.attr.priority,
1443                                   execlists->queue_priority_hint);
1444                         record_preemption(execlists);
1445
1446                         /*
1447                          * Don't let the RING_HEAD advance past the breadcrumb
1448                          * as we unwind (and until we resubmit) so that we do
1449                          * not accidentally tell it to go backwards.
1450                          */
1451                         ring_set_paused(engine, 1);
1452
1453                         /*
1454                          * Note that we have not stopped the GPU at this point,
1455                          * so we are unwinding the incomplete requests as they
1456                          * remain inflight and so by the time we do complete
1457                          * the preemption, some of the unwound requests may
1458                          * complete!
1459                          */
1460                         __unwind_incomplete_requests(engine);
1461
1462                         /*
1463                          * If we need to return to the preempted context, we
1464                          * need to skip the lite-restore and force it to
1465                          * reload the RING_TAIL. Otherwise, the HW has a
1466                          * tendency to ignore us rewinding the TAIL to the
1467                          * end of an earlier request.
1468                          */
1469                         last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1470                         last = NULL;
1471                 } else if (need_timeslice(engine, last) &&
1472                            !timer_pending(&engine->execlists.timer)) {
1473                         GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
1474                                   engine->name,
1475                                   last->fence.context,
1476                                   last->fence.seqno,
1477                                   last->sched.attr.priority,
1478                                   execlists->queue_priority_hint);
1479
1480                         ring_set_paused(engine, 1);
1481                         defer_active(engine);
1482
1483                         /*
1484                          * Unlike for preemption, if we rewind and continue
1485                          * executing the same context as previously active,
1486                          * the order of execution will remain the same and
1487                          * the tail will only advance. We do not need to
1488                          * force a full context restore, as a lite-restore
1489                          * is sufficient to resample the monotonic TAIL.
1490                          *
1491                          * If we switch to any other context, similarly we
1492                          * will not rewind TAIL of current context, and
1493                          * normal save/restore will preserve state and allow
1494                          * us to later continue executing the same request.
1495                          */
1496                         last = NULL;
1497                 } else {
1498                         /*
1499                          * Otherwise if we already have a request pending
1500                          * for execution after the current one, we can
1501                          * just wait until the next CS event before
1502                          * queuing more. In either case we will force a
1503                          * lite-restore preemption event, but if we wait
1504                          * we hopefully coalesce several updates into a single
1505                          * submission.
1506                          */
1507                         if (!list_is_last(&last->sched.link,
1508                                           &engine->active.requests))
1509                                 return;
1510
1511                         /*
1512                          * WaIdleLiteRestore:bdw,skl
1513                          * Apply the wa NOOPs to prevent
1514                          * ring:HEAD == rq:TAIL as we resubmit the
1515                          * request. See gen8_emit_fini_breadcrumb() for
1516                          * where we prepare the padding after the
1517                          * end of the request.
1518                          */
1519                         last->tail = last->wa_tail;
1520                 }
1521         }
1522
1523         while (rb) { /* XXX virtual is always taking precedence */
1524                 struct virtual_engine *ve =
1525                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1526                 struct i915_request *rq;
1527
1528                 spin_lock(&ve->base.active.lock);
1529
1530                 rq = ve->request;
1531                 if (unlikely(!rq)) { /* lost the race to a sibling */
1532                         spin_unlock(&ve->base.active.lock);
1533                         rb_erase_cached(rb, &execlists->virtual);
1534                         RB_CLEAR_NODE(rb);
1535                         rb = rb_first_cached(&execlists->virtual);
1536                         continue;
1537                 }
1538
1539                 GEM_BUG_ON(rq != ve->request);
1540                 GEM_BUG_ON(rq->engine != &ve->base);
1541                 GEM_BUG_ON(rq->hw_context != &ve->context);
1542
1543                 if (rq_prio(rq) >= queue_prio(execlists)) {
1544                         if (!virtual_matches(ve, rq, engine)) {
1545                                 spin_unlock(&ve->base.active.lock);
1546                                 rb = rb_next(rb);
1547                                 continue;
1548                         }
1549
1550                         if (last && !can_merge_rq(last, rq)) {
1551                                 spin_unlock(&ve->base.active.lock);
1552                                 return; /* leave this for another */
1553                         }
1554
1555                         GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
1556                                   engine->name,
1557                                   rq->fence.context,
1558                                   rq->fence.seqno,
1559                                   i915_request_completed(rq) ? "!" :
1560                                   i915_request_started(rq) ? "*" :
1561                                   "",
1562                                   yesno(engine != ve->siblings[0]));
1563
1564                         ve->request = NULL;
1565                         ve->base.execlists.queue_priority_hint = INT_MIN;
1566                         rb_erase_cached(rb, &execlists->virtual);
1567                         RB_CLEAR_NODE(rb);
1568
1569                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1570                         rq->engine = engine;
1571
1572                         if (engine != ve->siblings[0]) {
1573                                 u32 *regs = ve->context.lrc_reg_state;
1574                                 unsigned int n;
1575
1576                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1577
1578                                 if (!intel_engine_has_relative_mmio(engine))
1579                                         virtual_update_register_offsets(regs,
1580                                                                         engine);
1581
1582                                 if (!list_empty(&ve->context.signals))
1583                                         virtual_xfer_breadcrumbs(ve, engine);
1584
1585                                 /*
1586                                  * Move the bound engine to the top of the list
1587                                  * for future execution. We then kick this
1588                                  * tasklet first before checking others, so that
1589                                  * we preferentially reuse this set of bound
1590                                  * registers.
1591                                  */
1592                                 for (n = 1; n < ve->num_siblings; n++) {
1593                                         if (ve->siblings[n] == engine) {
1594                                                 swap(ve->siblings[n],
1595                                                      ve->siblings[0]);
1596                                                 break;
1597                                         }
1598                                 }
1599
1600                                 GEM_BUG_ON(ve->siblings[0] != engine);
1601                         }
1602
1603                         if (__i915_request_submit(rq)) {
1604                                 submit = true;
1605                                 last = rq;
1606                         }
1607                         i915_request_put(rq);
1608
1609                         /*
1610                          * Hmm, we have a bunch of virtual engine requests,
1611                          * but the first one was already completed (thanks
1612                          * preempt-to-busy!). Keep looking at the veng queue
1613                          * until we have no more relevant requests (i.e.
1614                          * the normal submit queue has higher priority).
1615                          */
1616                         if (!submit) {
1617                                 spin_unlock(&ve->base.active.lock);
1618                                 rb = rb_first_cached(&execlists->virtual);
1619                                 continue;
1620                         }
1621                 }
1622
1623                 spin_unlock(&ve->base.active.lock);
1624                 break;
1625         }
1626
1627         while ((rb = rb_first_cached(&execlists->queue))) {
1628                 struct i915_priolist *p = to_priolist(rb);
1629                 struct i915_request *rq, *rn;
1630                 int i;
1631
1632                 priolist_for_each_request_consume(rq, rn, p, i) {
1633                         bool merge = true;
1634
1635                         /*
1636                          * Can we combine this request with the current port?
1637                          * It has to be the same context/ringbuffer and not
1638                          * have any exceptions (e.g. GVT saying never to
1639                          * combine contexts).
1640                          *
1641                          * If we can combine the requests, we can execute both
1642                          * by updating the RING_TAIL to point to the end of the
1643                          * second request, and so we never need to tell the
1644                          * hardware about the first.
1645                          */
1646                         if (last && !can_merge_rq(last, rq)) {
1647                                 /*
1648                                  * If we are on the second port and cannot
1649                                  * combine this request with the last, then we
1650                                  * are done.
1651                                  */
1652                                 if (port == last_port)
1653                                         goto done;
1654
1655                                 /*
1656                                  * We must not populate both ELSP[] with the
1657                                  * same LRCA, i.e. we must submit 2 different
1658                                  * contexts if we submit 2 ELSP.
1659                                  */
1660                                 if (last->hw_context == rq->hw_context)
1661                                         goto done;
1662
1663                                 if (i915_request_has_sentinel(last))
1664                                         goto done;
1665
1666                                 /*
1667                                  * If GVT overrides us we only ever submit
1668                                  * port[0], leaving port[1] empty. Note that we
1669                                  * also have to be careful that we don't queue
1670                                  * the same context (even though a different
1671                                  * request) to the second port.
1672                                  */
1673                                 if (ctx_single_port_submission(last->hw_context) ||
1674                                     ctx_single_port_submission(rq->hw_context))
1675                                         goto done;
1676
1677                                 merge = false;
1678                         }
1679
1680                         if (__i915_request_submit(rq)) {
1681                                 if (!merge) {
1682                                         *port = execlists_schedule_in(last, port - execlists->pending);
1683                                         port++;
1684                                         last = NULL;
1685                                 }
1686
1687                                 GEM_BUG_ON(last &&
1688                                            !can_merge_ctx(last->hw_context,
1689                                                           rq->hw_context));
1690
1691                                 submit = true;
1692                                 last = rq;
1693                         }
1694                 }
1695
1696                 rb_erase_cached(&p->node, &execlists->queue);
1697                 i915_priolist_free(p);
1698         }
1699
1700 done:
1701         /*
1702          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
1703          *
1704          * We choose the priority hint such that if we add a request of greater
1705          * priority than this, we kick the submission tasklet to decide on
1706          * the right order of submitting the requests to hardware. We must
1707          * also be prepared to reorder requests as they are in-flight on the
1708          * HW. We derive the priority hint then as the first "hole" in
1709          * the HW submission ports and if there are no available slots,
1710          * the priority of the lowest executing request, i.e. last.
1711          *
1712          * When we do receive a higher priority request ready to run from the
1713          * user, see queue_request(), the priority hint is bumped to that
1714          * request triggering preemption on the next dequeue (or subsequent
1715          * interrupt for secondary ports).
1716          */
1717         execlists->queue_priority_hint = queue_prio(execlists);
1718         GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
1719                   engine->name, execlists->queue_priority_hint,
1720                   yesno(submit));
1721
1722         if (submit) {
1723                 *port = execlists_schedule_in(last, port - execlists->pending);
1724                 execlists->switch_priority_hint =
1725                         switch_prio(engine, *execlists->pending);
1726
1727                 /*
1728                  * Skip if we ended up with exactly the same set of requests,
1729                  * e.g. trying to timeslice a pair of ordered contexts
1730                  */
1731                 if (!memcmp(execlists->active, execlists->pending,
1732                             (port - execlists->pending + 1) * sizeof(*port))) {
1733                         do
1734                                 execlists_schedule_out(fetch_and_zero(port));
1735                         while (port-- != execlists->pending);
1736
1737                         goto skip_submit;
1738                 }
1739
1740                 memset(port + 1, 0, (last_port - port) * sizeof(*port));
1741                 execlists_submit_ports(engine);
1742         } else {
1743 skip_submit:
1744                 ring_set_paused(engine, 0);
1745         }
1746 }
1747
1748 static void
1749 cancel_port_requests(struct intel_engine_execlists * const execlists)
1750 {
1751         struct i915_request * const *port, *rq;
1752
1753         for (port = execlists->pending; (rq = *port); port++)
1754                 execlists_schedule_out(rq);
1755         memset(execlists->pending, 0, sizeof(execlists->pending));
1756
1757         for (port = execlists->active; (rq = *port); port++)
1758                 execlists_schedule_out(rq);
1759         execlists->active =
1760                 memset(execlists->inflight, 0, sizeof(execlists->inflight));
1761 }
1762
1763 static inline void
1764 invalidate_csb_entries(const u32 *first, const u32 *last)
1765 {
1766         clflush((void *)first);
1767         clflush((void *)last);
1768 }
1769
1770 static inline bool
1771 reset_in_progress(const struct intel_engine_execlists *execlists)
1772 {
1773         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1774 }
1775
1776 /*
1777  * Starting with Gen12, the status has a new format:
1778  *
1779  *     bit  0:     switched to new queue
1780  *     bit  1:     reserved
1781  *     bit  2:     semaphore wait mode (poll or signal), only valid when
1782  *                 switch detail is set to "wait on semaphore"
1783  *     bits 3-5:   engine class
1784  *     bits 6-11:  engine instance
1785  *     bits 12-14: reserved
1786  *     bits 15-25: sw context id of the lrc the GT switched to
1787  *     bits 26-31: sw counter of the lrc the GT switched to
1788  *     bits 32-35: context switch detail
1789  *                  - 0: ctx complete
1790  *                  - 1: wait on sync flip
1791  *                  - 2: wait on vblank
1792  *                  - 3: wait on scanline
1793  *                  - 4: wait on semaphore
1794  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
1795  *                       WAIT_FOR_EVENT)
1796  *     bit  36:    reserved
1797  *     bits 37-43: wait detail (for switch detail 1 to 4)
1798  *     bits 44-46: reserved
1799  *     bits 47-57: sw context id of the lrc the GT switched away from
1800  *     bits 58-63: sw counter of the lrc the GT switched away from
1801  */
1802 static inline bool
1803 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
1804 {
1805         u32 lower_dw = csb[0];
1806         u32 upper_dw = csb[1];
1807         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
1808         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
1809         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
1810
1811         /*
1812          * The context switch detail is not guaranteed to be 5 when a preemption
1813          * occurs, so we can't just check for that. The check below works for
1814          * all the cases we care about, including preemptions of WAIT
1815          * instructions and lite-restore. Preempt-to-idle via the CTRL register
1816          * would require some extra handling, but we don't support that.
1817          */
1818         if (!ctx_away_valid || new_queue) {
1819                 GEM_BUG_ON(!ctx_to_valid);
1820                 return true;
1821         }
1822
1823         /*
1824          * switch detail = 5 is covered by the case above and we do not expect a
1825          * context switch on an unsuccessful wait instruction since we always
1826          * use polling mode.
1827          */
1828         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
1829         return false;
1830 }
1831
1832 static inline bool
1833 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
1834 {
1835         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
1836 }
1837
1838 static void process_csb(struct intel_engine_cs *engine)
1839 {
1840         struct intel_engine_execlists * const execlists = &engine->execlists;
1841         const u32 * const buf = execlists->csb_status;
1842         const u8 num_entries = execlists->csb_size;
1843         u8 head, tail;
1844
1845         /*
1846          * As we modify our execlists state tracking we require exclusive
1847          * access. Either we are inside the tasklet, or the tasklet is disabled
1848          * and we assume that is only inside the reset paths and so serialised.
1849          */
1850         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
1851                    !reset_in_progress(execlists));
1852         GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
1853
1854         /*
1855          * Note that csb_write, csb_status may be either in HWSP or mmio.
1856          * When reading from the csb_write mmio register, we have to be
1857          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
1858          * the low 4bits. As it happens we know the next 4bits are always
1859          * zero and so we can simply masked off the low u8 of the register
1860          * and treat it identically to reading from the HWSP (without having
1861          * to use explicit shifting and masking, and probably bifurcating
1862          * the code to handle the legacy mmio read).
1863          */
1864         head = execlists->csb_head;
1865         tail = READ_ONCE(*execlists->csb_write);
1866         GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
1867         if (unlikely(head == tail))
1868                 return;
1869
1870         /*
1871          * Hopefully paired with a wmb() in HW!
1872          *
1873          * We must complete the read of the write pointer before any reads
1874          * from the CSB, so that we do not see stale values. Without an rmb
1875          * (lfence) the HW may speculatively perform the CSB[] reads *before*
1876          * we perform the READ_ONCE(*csb_write).
1877          */
1878         rmb();
1879
1880         do {
1881                 bool promote;
1882
1883                 if (++head == num_entries)
1884                         head = 0;
1885
1886                 /*
1887                  * We are flying near dragons again.
1888                  *
1889                  * We hold a reference to the request in execlist_port[]
1890                  * but no more than that. We are operating in softirq
1891                  * context and so cannot hold any mutex or sleep. That
1892                  * prevents us stopping the requests we are processing
1893                  * in port[] from being retired simultaneously (the
1894                  * breadcrumb will be complete before we see the
1895                  * context-switch). As we only hold the reference to the
1896                  * request, any pointer chasing underneath the request
1897                  * is subject to a potential use-after-free. Thus we
1898                  * store all of the bookkeeping within port[] as
1899                  * required, and avoid using unguarded pointers beneath
1900                  * request itself. The same applies to the atomic
1901                  * status notifier.
1902                  */
1903
1904                 GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
1905                           engine->name, head,
1906                           buf[2 * head + 0], buf[2 * head + 1]);
1907
1908                 if (INTEL_GEN(engine->i915) >= 12)
1909                         promote = gen12_csb_parse(execlists, buf + 2 * head);
1910                 else
1911                         promote = gen8_csb_parse(execlists, buf + 2 * head);
1912                 if (promote) {
1913                         /* cancel old inflight, prepare for switch */
1914                         trace_ports(execlists, "preempted", execlists->active);
1915                         while (*execlists->active)
1916                                 execlists_schedule_out(*execlists->active++);
1917
1918                         /* switch pending to inflight */
1919                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
1920                         execlists->active =
1921                                 memcpy(execlists->inflight,
1922                                        execlists->pending,
1923                                        execlists_num_ports(execlists) *
1924                                        sizeof(*execlists->pending));
1925
1926                         if (enable_timeslice(execlists))
1927                                 mod_timer(&execlists->timer, jiffies + 1);
1928
1929                         if (!inject_preempt_hang(execlists))
1930                                 ring_set_paused(engine, 0);
1931
1932                         WRITE_ONCE(execlists->pending[0], NULL);
1933                 } else {
1934                         GEM_BUG_ON(!*execlists->active);
1935
1936                         /* port0 completed, advanced to port1 */
1937                         trace_ports(execlists, "completed", execlists->active);
1938
1939                         /*
1940                          * We rely on the hardware being strongly
1941                          * ordered, that the breadcrumb write is
1942                          * coherent (visible from the CPU) before the
1943                          * user interrupt and CSB is processed.
1944                          */
1945                         GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
1946                                    !reset_in_progress(execlists));
1947                         execlists_schedule_out(*execlists->active++);
1948
1949                         GEM_BUG_ON(execlists->active - execlists->inflight >
1950                                    execlists_num_ports(execlists));
1951                 }
1952         } while (head != tail);
1953
1954         execlists->csb_head = head;
1955
1956         /*
1957          * Gen11 has proven to fail wrt global observation point between
1958          * entry and tail update, failing on the ordering and thus
1959          * we see an old entry in the context status buffer.
1960          *
1961          * Forcibly evict out entries for the next gpu csb update,
1962          * to increase the odds that we get a fresh entries with non
1963          * working hardware. The cost for doing so comes out mostly with
1964          * the wash as hardware, working or not, will need to do the
1965          * invalidation before.
1966          */
1967         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
1968 }
1969
1970 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
1971 {
1972         lockdep_assert_held(&engine->active.lock);
1973         if (!engine->execlists.pending[0]) {
1974                 rcu_read_lock(); /* protect peeking at execlists->active */
1975                 execlists_dequeue(engine);
1976                 rcu_read_unlock();
1977         }
1978 }
1979
1980 /*
1981  * Check the unread Context Status Buffers and manage the submission of new
1982  * contexts to the ELSP accordingly.
1983  */
1984 static void execlists_submission_tasklet(unsigned long data)
1985 {
1986         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
1987         unsigned long flags;
1988
1989         process_csb(engine);
1990         if (!READ_ONCE(engine->execlists.pending[0])) {
1991                 spin_lock_irqsave(&engine->active.lock, flags);
1992                 __execlists_submission_tasklet(engine);
1993                 spin_unlock_irqrestore(&engine->active.lock, flags);
1994         }
1995 }
1996
1997 static void execlists_submission_timer(struct timer_list *timer)
1998 {
1999         struct intel_engine_cs *engine =
2000                 from_timer(engine, timer, execlists.timer);
2001
2002         /* Kick the tasklet for some interrupt coalescing and reset handling */
2003         tasklet_hi_schedule(&engine->execlists.tasklet);
2004 }
2005
2006 static void queue_request(struct intel_engine_cs *engine,
2007                           struct i915_sched_node *node,
2008                           int prio)
2009 {
2010         GEM_BUG_ON(!list_empty(&node->link));
2011         list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
2012 }
2013
2014 static void __submit_queue_imm(struct intel_engine_cs *engine)
2015 {
2016         struct intel_engine_execlists * const execlists = &engine->execlists;
2017
2018         if (reset_in_progress(execlists))
2019                 return; /* defer until we restart the engine following reset */
2020
2021         if (execlists->tasklet.func == execlists_submission_tasklet)
2022                 __execlists_submission_tasklet(engine);
2023         else
2024                 tasklet_hi_schedule(&execlists->tasklet);
2025 }
2026
2027 static void submit_queue(struct intel_engine_cs *engine,
2028                          const struct i915_request *rq)
2029 {
2030         struct intel_engine_execlists *execlists = &engine->execlists;
2031
2032         if (rq_prio(rq) <= execlists->queue_priority_hint)
2033                 return;
2034
2035         execlists->queue_priority_hint = rq_prio(rq);
2036         __submit_queue_imm(engine);
2037 }
2038
2039 static void execlists_submit_request(struct i915_request *request)
2040 {
2041         struct intel_engine_cs *engine = request->engine;
2042         unsigned long flags;
2043
2044         /* Will be called from irq-context when using foreign fences. */
2045         spin_lock_irqsave(&engine->active.lock, flags);
2046
2047         queue_request(engine, &request->sched, rq_prio(request));
2048
2049         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2050         GEM_BUG_ON(list_empty(&request->sched.link));
2051
2052         submit_queue(engine, request);
2053
2054         spin_unlock_irqrestore(&engine->active.lock, flags);
2055 }
2056
2057 static void __execlists_context_fini(struct intel_context *ce)
2058 {
2059         intel_ring_put(ce->ring);
2060         i915_vma_put(ce->state);
2061 }
2062
2063 static void execlists_context_destroy(struct kref *kref)
2064 {
2065         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2066
2067         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2068         GEM_BUG_ON(intel_context_is_pinned(ce));
2069
2070         if (ce->state)
2071                 __execlists_context_fini(ce);
2072
2073         intel_context_fini(ce);
2074         intel_context_free(ce);
2075 }
2076
2077 static void
2078 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2079 {
2080         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2081                 return;
2082
2083         vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
2084         vaddr += engine->context_size;
2085
2086         memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
2087 }
2088
2089 static void
2090 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2091 {
2092         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2093                 return;
2094
2095         vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
2096         vaddr += engine->context_size;
2097
2098         if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
2099                 dev_err_once(engine->i915->drm.dev,
2100                              "%s context redzone overwritten!\n",
2101                              engine->name);
2102 }
2103
2104 static void execlists_context_unpin(struct intel_context *ce)
2105 {
2106         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2107                       ce->engine);
2108
2109         i915_gem_object_unpin_map(ce->state->obj);
2110         intel_ring_reset(ce->ring, ce->ring->tail);
2111 }
2112
2113 static void
2114 __execlists_update_reg_state(const struct intel_context *ce,
2115                              const struct intel_engine_cs *engine)
2116 {
2117         struct intel_ring *ring = ce->ring;
2118         u32 *regs = ce->lrc_reg_state;
2119
2120         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
2121         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2122
2123         regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
2124         regs[CTX_RING_HEAD] = ring->head;
2125         regs[CTX_RING_TAIL] = ring->tail;
2126
2127         /* RPCS */
2128         if (engine->class == RENDER_CLASS) {
2129                 regs[CTX_R_PWR_CLK_STATE] =
2130                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2131
2132                 i915_oa_init_reg_state(ce, engine);
2133         }
2134 }
2135
2136 static int
2137 __execlists_context_pin(struct intel_context *ce,
2138                         struct intel_engine_cs *engine)
2139 {
2140         void *vaddr;
2141         int ret;
2142
2143         GEM_BUG_ON(!ce->state);
2144
2145         ret = intel_context_active_acquire(ce);
2146         if (ret)
2147                 goto err;
2148         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2149
2150         vaddr = i915_gem_object_pin_map(ce->state->obj,
2151                                         i915_coherent_map_type(engine->i915) |
2152                                         I915_MAP_OVERRIDE);
2153         if (IS_ERR(vaddr)) {
2154                 ret = PTR_ERR(vaddr);
2155                 goto unpin_active;
2156         }
2157
2158         ce->lrc_desc = lrc_descriptor(ce, engine);
2159         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2160         __execlists_update_reg_state(ce, engine);
2161
2162         return 0;
2163
2164 unpin_active:
2165         intel_context_active_release(ce);
2166 err:
2167         return ret;
2168 }
2169
2170 static int execlists_context_pin(struct intel_context *ce)
2171 {
2172         return __execlists_context_pin(ce, ce->engine);
2173 }
2174
2175 static int execlists_context_alloc(struct intel_context *ce)
2176 {
2177         return __execlists_context_alloc(ce, ce->engine);
2178 }
2179
2180 static void execlists_context_reset(struct intel_context *ce)
2181 {
2182         /*
2183          * Because we emit WA_TAIL_DWORDS there may be a disparity
2184          * between our bookkeeping in ce->ring->head and ce->ring->tail and
2185          * that stored in context. As we only write new commands from
2186          * ce->ring->tail onwards, everything before that is junk. If the GPU
2187          * starts reading from its RING_HEAD from the context, it may try to
2188          * execute that junk and die.
2189          *
2190          * The contexts that are stilled pinned on resume belong to the
2191          * kernel, and are local to each engine. All other contexts will
2192          * have their head/tail sanitized upon pinning before use, so they
2193          * will never see garbage,
2194          *
2195          * So to avoid that we reset the context images upon resume. For
2196          * simplicity, we just zero everything out.
2197          */
2198         intel_ring_reset(ce->ring, 0);
2199         __execlists_update_reg_state(ce, ce->engine);
2200 }
2201
2202 static const struct intel_context_ops execlists_context_ops = {
2203         .alloc = execlists_context_alloc,
2204
2205         .pin = execlists_context_pin,
2206         .unpin = execlists_context_unpin,
2207
2208         .enter = intel_context_enter_engine,
2209         .exit = intel_context_exit_engine,
2210
2211         .reset = execlists_context_reset,
2212         .destroy = execlists_context_destroy,
2213 };
2214
2215 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
2216 {
2217         u32 *cs;
2218
2219         GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
2220
2221         cs = intel_ring_begin(rq, 6);
2222         if (IS_ERR(cs))
2223                 return PTR_ERR(cs);
2224
2225         /*
2226          * Check if we have been preempted before we even get started.
2227          *
2228          * After this point i915_request_started() reports true, even if
2229          * we get preempted and so are no longer running.
2230          */
2231         *cs++ = MI_ARB_CHECK;
2232         *cs++ = MI_NOOP;
2233
2234         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2235         *cs++ = i915_request_timeline(rq)->hwsp_offset;
2236         *cs++ = 0;
2237         *cs++ = rq->fence.seqno - 1;
2238
2239         intel_ring_advance(rq, cs);
2240
2241         /* Record the updated position of the request's payload */
2242         rq->infix = intel_ring_offset(rq, cs);
2243
2244         return 0;
2245 }
2246
2247 static int execlists_request_alloc(struct i915_request *request)
2248 {
2249         int ret;
2250
2251         GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
2252
2253         /*
2254          * Flush enough space to reduce the likelihood of waiting after
2255          * we start building the request - in which case we will just
2256          * have to repeat work.
2257          */
2258         request->reserved_space += EXECLISTS_REQUEST_SIZE;
2259
2260         /*
2261          * Note that after this point, we have committed to using
2262          * this request as it is being used to both track the
2263          * state of engine initialisation and liveness of the
2264          * golden renderstate above. Think twice before you try
2265          * to cancel/unwind this request now.
2266          */
2267
2268         /* Unconditionally invalidate GPU caches and TLBs. */
2269         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
2270         if (ret)
2271                 return ret;
2272
2273         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
2274         return 0;
2275 }
2276
2277 /*
2278  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
2279  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
2280  * but there is a slight complication as this is applied in WA batch where the
2281  * values are only initialized once so we cannot take register value at the
2282  * beginning and reuse it further; hence we save its value to memory, upload a
2283  * constant value with bit21 set and then we restore it back with the saved value.
2284  * To simplify the WA, a constant value is formed by using the default value
2285  * of this register. This shouldn't be a problem because we are only modifying
2286  * it for a short period and this batch in non-premptible. We can ofcourse
2287  * use additional instructions that read the actual value of the register
2288  * at that time and set our bit of interest but it makes the WA complicated.
2289  *
2290  * This WA is also required for Gen9 so extracting as a function avoids
2291  * code duplication.
2292  */
2293 static u32 *
2294 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
2295 {
2296         /* NB no one else is allowed to scribble over scratch + 256! */
2297         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
2298         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2299         *batch++ = intel_gt_scratch_offset(engine->gt,
2300                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
2301         *batch++ = 0;
2302
2303         *batch++ = MI_LOAD_REGISTER_IMM(1);
2304         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2305         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
2306
2307         batch = gen8_emit_pipe_control(batch,
2308                                        PIPE_CONTROL_CS_STALL |
2309                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
2310                                        0);
2311
2312         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
2313         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2314         *batch++ = intel_gt_scratch_offset(engine->gt,
2315                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
2316         *batch++ = 0;
2317
2318         return batch;
2319 }
2320
2321 /*
2322  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
2323  * initialized at the beginning and shared across all contexts but this field
2324  * helps us to have multiple batches at different offsets and select them based
2325  * on a criteria. At the moment this batch always start at the beginning of the page
2326  * and at this point we don't have multiple wa_ctx batch buffers.
2327  *
2328  * The number of WA applied are not known at the beginning; we use this field
2329  * to return the no of DWORDS written.
2330  *
2331  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
2332  * so it adds NOOPs as padding to make it cacheline aligned.
2333  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
2334  * makes a complete batch buffer.
2335  */
2336 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2337 {
2338         /* WaDisableCtxRestoreArbitration:bdw,chv */
2339         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2340
2341         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
2342         if (IS_BROADWELL(engine->i915))
2343                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
2344
2345         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
2346         /* Actual scratch location is at 128 bytes offset */
2347         batch = gen8_emit_pipe_control(batch,
2348                                        PIPE_CONTROL_FLUSH_L3 |
2349                                        PIPE_CONTROL_STORE_DATA_INDEX |
2350                                        PIPE_CONTROL_CS_STALL |
2351                                        PIPE_CONTROL_QW_WRITE,
2352                                        LRC_PPHWSP_SCRATCH_ADDR);
2353
2354         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2355
2356         /* Pad to end of cacheline */
2357         while ((unsigned long)batch % CACHELINE_BYTES)
2358                 *batch++ = MI_NOOP;
2359
2360         /*
2361          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
2362          * execution depends on the length specified in terms of cache lines
2363          * in the register CTX_RCS_INDIRECT_CTX
2364          */
2365
2366         return batch;
2367 }
2368
2369 struct lri {
2370         i915_reg_t reg;
2371         u32 value;
2372 };
2373
2374 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
2375 {
2376         GEM_BUG_ON(!count || count > 63);
2377
2378         *batch++ = MI_LOAD_REGISTER_IMM(count);
2379         do {
2380                 *batch++ = i915_mmio_reg_offset(lri->reg);
2381                 *batch++ = lri->value;
2382         } while (lri++, --count);
2383         *batch++ = MI_NOOP;
2384
2385         return batch;
2386 }
2387
2388 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2389 {
2390         static const struct lri lri[] = {
2391                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
2392                 {
2393                         COMMON_SLICE_CHICKEN2,
2394                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
2395                                        0),
2396                 },
2397
2398                 /* BSpec: 11391 */
2399                 {
2400                         FF_SLICE_CHICKEN,
2401                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
2402                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
2403                 },
2404
2405                 /* BSpec: 11299 */
2406                 {
2407                         _3D_CHICKEN3,
2408                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
2409                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
2410                 }
2411         };
2412
2413         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2414
2415         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
2416         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
2417
2418         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
2419
2420         /* WaMediaPoolStateCmdInWABB:bxt,glk */
2421         if (HAS_POOLED_EU(engine->i915)) {
2422                 /*
2423                  * EU pool configuration is setup along with golden context
2424                  * during context initialization. This value depends on
2425                  * device type (2x6 or 3x6) and needs to be updated based
2426                  * on which subslice is disabled especially for 2x6
2427                  * devices, however it is safe to load default
2428                  * configuration of 3x6 device instead of masking off
2429                  * corresponding bits because HW ignores bits of a disabled
2430                  * subslice and drops down to appropriate config. Please
2431                  * see render_state_setup() in i915_gem_render_state.c for
2432                  * possible configurations, to avoid duplication they are
2433                  * not shown here again.
2434                  */
2435                 *batch++ = GEN9_MEDIA_POOL_STATE;
2436                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
2437                 *batch++ = 0x00777000;
2438                 *batch++ = 0;
2439                 *batch++ = 0;
2440                 *batch++ = 0;
2441         }
2442
2443         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2444
2445         /* Pad to end of cacheline */
2446         while ((unsigned long)batch % CACHELINE_BYTES)
2447                 *batch++ = MI_NOOP;
2448
2449         return batch;
2450 }
2451
2452 static u32 *
2453 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2454 {
2455         int i;
2456
2457         /*
2458          * WaPipeControlBefore3DStateSamplePattern: cnl
2459          *
2460          * Ensure the engine is idle prior to programming a
2461          * 3DSTATE_SAMPLE_PATTERN during a context restore.
2462          */
2463         batch = gen8_emit_pipe_control(batch,
2464                                        PIPE_CONTROL_CS_STALL,
2465                                        0);
2466         /*
2467          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
2468          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
2469          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
2470          * confusing. Since gen8_emit_pipe_control() already advances the
2471          * batch by 6 dwords, we advance the other 10 here, completing a
2472          * cacheline. It's not clear if the workaround requires this padding
2473          * before other commands, or if it's just the regular padding we would
2474          * already have for the workaround bb, so leave it here for now.
2475          */
2476         for (i = 0; i < 10; i++)
2477                 *batch++ = MI_NOOP;
2478
2479         /* Pad to end of cacheline */
2480         while ((unsigned long)batch % CACHELINE_BYTES)
2481                 *batch++ = MI_NOOP;
2482
2483         return batch;
2484 }
2485
2486 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
2487
2488 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
2489 {
2490         struct drm_i915_gem_object *obj;
2491         struct i915_vma *vma;
2492         int err;
2493
2494         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
2495         if (IS_ERR(obj))
2496                 return PTR_ERR(obj);
2497
2498         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
2499         if (IS_ERR(vma)) {
2500                 err = PTR_ERR(vma);
2501                 goto err;
2502         }
2503
2504         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
2505         if (err)
2506                 goto err;
2507
2508         engine->wa_ctx.vma = vma;
2509         return 0;
2510
2511 err:
2512         i915_gem_object_put(obj);
2513         return err;
2514 }
2515
2516 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
2517 {
2518         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
2519 }
2520
2521 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
2522
2523 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
2524 {
2525         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
2526         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
2527                                             &wa_ctx->per_ctx };
2528         wa_bb_func_t wa_bb_fn[2];
2529         struct page *page;
2530         void *batch, *batch_ptr;
2531         unsigned int i;
2532         int ret;
2533
2534         if (engine->class != RENDER_CLASS)
2535                 return 0;
2536
2537         switch (INTEL_GEN(engine->i915)) {
2538         case 12:
2539         case 11:
2540                 return 0;
2541         case 10:
2542                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
2543                 wa_bb_fn[1] = NULL;
2544                 break;
2545         case 9:
2546                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
2547                 wa_bb_fn[1] = NULL;
2548                 break;
2549         case 8:
2550                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
2551                 wa_bb_fn[1] = NULL;
2552                 break;
2553         default:
2554                 MISSING_CASE(INTEL_GEN(engine->i915));
2555                 return 0;
2556         }
2557
2558         ret = lrc_setup_wa_ctx(engine);
2559         if (ret) {
2560                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
2561                 return ret;
2562         }
2563
2564         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
2565         batch = batch_ptr = kmap_atomic(page);
2566
2567         /*
2568          * Emit the two workaround batch buffers, recording the offset from the
2569          * start of the workaround batch buffer object for each and their
2570          * respective sizes.
2571          */
2572         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
2573                 wa_bb[i]->offset = batch_ptr - batch;
2574                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
2575                                                   CACHELINE_BYTES))) {
2576                         ret = -EINVAL;
2577                         break;
2578                 }
2579                 if (wa_bb_fn[i])
2580                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
2581                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
2582         }
2583
2584         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
2585
2586         kunmap_atomic(batch);
2587         if (ret)
2588                 lrc_destroy_wa_ctx(engine);
2589
2590         return ret;
2591 }
2592
2593 static void enable_execlists(struct intel_engine_cs *engine)
2594 {
2595         u32 mode;
2596
2597         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
2598
2599         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
2600
2601         if (INTEL_GEN(engine->i915) >= 11)
2602                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
2603         else
2604                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
2605         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
2606
2607         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
2608
2609         ENGINE_WRITE_FW(engine,
2610                         RING_HWS_PGA,
2611                         i915_ggtt_offset(engine->status_page.vma));
2612         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
2613 }
2614
2615 static bool unexpected_starting_state(struct intel_engine_cs *engine)
2616 {
2617         bool unexpected = false;
2618
2619         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
2620                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
2621                 unexpected = true;
2622         }
2623
2624         return unexpected;
2625 }
2626
2627 static int execlists_resume(struct intel_engine_cs *engine)
2628 {
2629         intel_engine_apply_workarounds(engine);
2630         intel_engine_apply_whitelist(engine);
2631
2632         intel_mocs_init_engine(engine);
2633
2634         intel_engine_reset_breadcrumbs(engine);
2635
2636         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
2637                 struct drm_printer p = drm_debug_printer(__func__);
2638
2639                 intel_engine_dump(engine, &p, NULL);
2640         }
2641
2642         enable_execlists(engine);
2643
2644         return 0;
2645 }
2646
2647 static void execlists_reset_prepare(struct intel_engine_cs *engine)
2648 {
2649         struct intel_engine_execlists * const execlists = &engine->execlists;
2650         unsigned long flags;
2651
2652         GEM_TRACE("%s: depth<-%d\n", engine->name,
2653                   atomic_read(&execlists->tasklet.count));
2654
2655         /*
2656          * Prevent request submission to the hardware until we have
2657          * completed the reset in i915_gem_reset_finish(). If a request
2658          * is completed by one engine, it may then queue a request
2659          * to a second via its execlists->tasklet *just* as we are
2660          * calling engine->resume() and also writing the ELSP.
2661          * Turning off the execlists->tasklet until the reset is over
2662          * prevents the race.
2663          */
2664         __tasklet_disable_sync_once(&execlists->tasklet);
2665         GEM_BUG_ON(!reset_in_progress(execlists));
2666
2667         /* And flush any current direct submission. */
2668         spin_lock_irqsave(&engine->active.lock, flags);
2669         spin_unlock_irqrestore(&engine->active.lock, flags);
2670
2671         /*
2672          * We stop engines, otherwise we might get failed reset and a
2673          * dead gpu (on elk). Also as modern gpu as kbl can suffer
2674          * from system hang if batchbuffer is progressing when
2675          * the reset is issued, regardless of READY_TO_RESET ack.
2676          * Thus assume it is best to stop engines on all gens
2677          * where we have a gpu reset.
2678          *
2679          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
2680          *
2681          * FIXME: Wa for more modern gens needs to be validated
2682          */
2683         intel_engine_stop_cs(engine);
2684 }
2685
2686 static void reset_csb_pointers(struct intel_engine_cs *engine)
2687 {
2688         struct intel_engine_execlists * const execlists = &engine->execlists;
2689         const unsigned int reset_value = execlists->csb_size - 1;
2690
2691         ring_set_paused(engine, 0);
2692
2693         /*
2694          * After a reset, the HW starts writing into CSB entry [0]. We
2695          * therefore have to set our HEAD pointer back one entry so that
2696          * the *first* entry we check is entry 0. To complicate this further,
2697          * as we don't wait for the first interrupt after reset, we have to
2698          * fake the HW write to point back to the last entry so that our
2699          * inline comparison of our cached head position against the last HW
2700          * write works even before the first interrupt.
2701          */
2702         execlists->csb_head = reset_value;
2703         WRITE_ONCE(*execlists->csb_write, reset_value);
2704         wmb(); /* Make sure this is visible to HW (paranoia?) */
2705
2706         invalidate_csb_entries(&execlists->csb_status[0],
2707                                &execlists->csb_status[reset_value]);
2708 }
2709
2710 static struct i915_request *active_request(struct i915_request *rq)
2711 {
2712         const struct intel_context * const ce = rq->hw_context;
2713         struct i915_request *active = NULL;
2714         struct list_head *list;
2715
2716         if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
2717                 return rq;
2718
2719         list = &i915_request_active_timeline(rq)->requests;
2720         list_for_each_entry_from_reverse(rq, list, link) {
2721                 if (i915_request_completed(rq))
2722                         break;
2723
2724                 if (rq->hw_context != ce)
2725                         break;
2726
2727                 active = rq;
2728         }
2729
2730         return active;
2731 }
2732
2733 static void __execlists_reset_reg_state(const struct intel_context *ce,
2734                                         const struct intel_engine_cs *engine)
2735 {
2736         u32 *regs = ce->lrc_reg_state;
2737
2738         if (INTEL_GEN(engine->i915) >= 9) {
2739                 regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING;
2740                 regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16;
2741         }
2742 }
2743
2744 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
2745 {
2746         struct intel_engine_execlists * const execlists = &engine->execlists;
2747         struct intel_context *ce;
2748         struct i915_request *rq;
2749         u32 *regs;
2750
2751         mb(); /* paranoia: read the CSB pointers from after the reset */
2752         clflush(execlists->csb_write);
2753         mb();
2754
2755         process_csb(engine); /* drain preemption events */
2756
2757         /* Following the reset, we need to reload the CSB read/write pointers */
2758         reset_csb_pointers(engine);
2759
2760         /*
2761          * Save the currently executing context, even if we completed
2762          * its request, it was still running at the time of the
2763          * reset and will have been clobbered.
2764          */
2765         rq = execlists_active(execlists);
2766         if (!rq)
2767                 goto unwind;
2768
2769         /* We still have requests in-flight; the engine should be active */
2770         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
2771
2772         ce = rq->hw_context;
2773         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2774
2775         /* Proclaim we have exclusive access to the context image! */
2776         __context_pin_acquire(ce);
2777
2778         rq = active_request(rq);
2779         if (!rq) {
2780                 /* Idle context; tidy up the ring so we can restart afresh */
2781                 ce->ring->head = ce->ring->tail;
2782                 goto out_replay;
2783         }
2784
2785         /* Context has requests still in-flight; it should not be idle! */
2786         GEM_BUG_ON(i915_active_is_idle(&ce->active));
2787         ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
2788
2789         /*
2790          * If this request hasn't started yet, e.g. it is waiting on a
2791          * semaphore, we need to avoid skipping the request or else we
2792          * break the signaling chain. However, if the context is corrupt
2793          * the request will not restart and we will be stuck with a wedged
2794          * device. It is quite often the case that if we issue a reset
2795          * while the GPU is loading the context image, that the context
2796          * image becomes corrupt.
2797          *
2798          * Otherwise, if we have not started yet, the request should replay
2799          * perfectly and we do not need to flag the result as being erroneous.
2800          */
2801         if (!i915_request_started(rq))
2802                 goto out_replay;
2803
2804         /*
2805          * If the request was innocent, we leave the request in the ELSP
2806          * and will try to replay it on restarting. The context image may
2807          * have been corrupted by the reset, in which case we may have
2808          * to service a new GPU hang, but more likely we can continue on
2809          * without impact.
2810          *
2811          * If the request was guilty, we presume the context is corrupt
2812          * and have to at least restore the RING register in the context
2813          * image back to the expected values to skip over the guilty request.
2814          */
2815         __i915_request_reset(rq, stalled);
2816         if (!stalled)
2817                 goto out_replay;
2818
2819         /*
2820          * We want a simple context + ring to execute the breadcrumb update.
2821          * We cannot rely on the context being intact across the GPU hang,
2822          * so clear it and rebuild just what we need for the breadcrumb.
2823          * All pending requests for this context will be zapped, and any
2824          * future request will be after userspace has had the opportunity
2825          * to recreate its own state.
2826          */
2827         GEM_BUG_ON(!intel_context_is_pinned(ce));
2828         regs = ce->lrc_reg_state;
2829         if (engine->pinned_default_state) {
2830                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
2831                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
2832                        engine->context_size - PAGE_SIZE);
2833         }
2834         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
2835
2836 out_replay:
2837         GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
2838                   engine->name, ce->ring->head, ce->ring->tail);
2839         intel_ring_update_space(ce->ring);
2840         __execlists_reset_reg_state(ce, engine);
2841         __execlists_update_reg_state(ce, engine);
2842         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
2843         __context_pin_release(ce);
2844
2845 unwind:
2846         /* Push back any incomplete requests for replay after the reset. */
2847         cancel_port_requests(execlists);
2848         __unwind_incomplete_requests(engine);
2849 }
2850
2851 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
2852 {
2853         unsigned long flags;
2854
2855         GEM_TRACE("%s\n", engine->name);
2856
2857         spin_lock_irqsave(&engine->active.lock, flags);
2858
2859         __execlists_reset(engine, stalled);
2860
2861         spin_unlock_irqrestore(&engine->active.lock, flags);
2862 }
2863
2864 static void nop_submission_tasklet(unsigned long data)
2865 {
2866         /* The driver is wedged; don't process any more events. */
2867 }
2868
2869 static void execlists_cancel_requests(struct intel_engine_cs *engine)
2870 {
2871         struct intel_engine_execlists * const execlists = &engine->execlists;
2872         struct i915_request *rq, *rn;
2873         struct rb_node *rb;
2874         unsigned long flags;
2875
2876         GEM_TRACE("%s\n", engine->name);
2877
2878         /*
2879          * Before we call engine->cancel_requests(), we should have exclusive
2880          * access to the submission state. This is arranged for us by the
2881          * caller disabling the interrupt generation, the tasklet and other
2882          * threads that may then access the same state, giving us a free hand
2883          * to reset state. However, we still need to let lockdep be aware that
2884          * we know this state may be accessed in hardirq context, so we
2885          * disable the irq around this manipulation and we want to keep
2886          * the spinlock focused on its duties and not accidentally conflate
2887          * coverage to the submission's irq state. (Similarly, although we
2888          * shouldn't need to disable irq around the manipulation of the
2889          * submission's irq state, we also wish to remind ourselves that
2890          * it is irq state.)
2891          */
2892         spin_lock_irqsave(&engine->active.lock, flags);
2893
2894         __execlists_reset(engine, true);
2895
2896         /* Mark all executing requests as skipped. */
2897         list_for_each_entry(rq, &engine->active.requests, sched.link)
2898                 mark_eio(rq);
2899
2900         /* Flush the queued requests to the timeline list (for retiring). */
2901         while ((rb = rb_first_cached(&execlists->queue))) {
2902                 struct i915_priolist *p = to_priolist(rb);
2903                 int i;
2904
2905                 priolist_for_each_request_consume(rq, rn, p, i) {
2906                         mark_eio(rq);
2907                         __i915_request_submit(rq);
2908                 }
2909
2910                 rb_erase_cached(&p->node, &execlists->queue);
2911                 i915_priolist_free(p);
2912         }
2913
2914         /* Cancel all attached virtual engines */
2915         while ((rb = rb_first_cached(&execlists->virtual))) {
2916                 struct virtual_engine *ve =
2917                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2918
2919                 rb_erase_cached(rb, &execlists->virtual);
2920                 RB_CLEAR_NODE(rb);
2921
2922                 spin_lock(&ve->base.active.lock);
2923                 rq = fetch_and_zero(&ve->request);
2924                 if (rq) {
2925                         mark_eio(rq);
2926
2927                         rq->engine = engine;
2928                         __i915_request_submit(rq);
2929                         i915_request_put(rq);
2930
2931                         ve->base.execlists.queue_priority_hint = INT_MIN;
2932                 }
2933                 spin_unlock(&ve->base.active.lock);
2934         }
2935
2936         /* Remaining _unready_ requests will be nop'ed when submitted */
2937
2938         execlists->queue_priority_hint = INT_MIN;
2939         execlists->queue = RB_ROOT_CACHED;
2940
2941         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
2942         execlists->tasklet.func = nop_submission_tasklet;
2943
2944         spin_unlock_irqrestore(&engine->active.lock, flags);
2945 }
2946
2947 static void execlists_reset_finish(struct intel_engine_cs *engine)
2948 {
2949         struct intel_engine_execlists * const execlists = &engine->execlists;
2950
2951         /*
2952          * After a GPU reset, we may have requests to replay. Do so now while
2953          * we still have the forcewake to be sure that the GPU is not allowed
2954          * to sleep before we restart and reload a context.
2955          */
2956         GEM_BUG_ON(!reset_in_progress(execlists));
2957         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
2958                 execlists->tasklet.func(execlists->tasklet.data);
2959
2960         if (__tasklet_enable(&execlists->tasklet))
2961                 /* And kick in case we missed a new request submission. */
2962                 tasklet_hi_schedule(&execlists->tasklet);
2963         GEM_TRACE("%s: depth->%d\n", engine->name,
2964                   atomic_read(&execlists->tasklet.count));
2965 }
2966
2967 static int gen8_emit_bb_start(struct i915_request *rq,
2968                               u64 offset, u32 len,
2969                               const unsigned int flags)
2970 {
2971         u32 *cs;
2972
2973         cs = intel_ring_begin(rq, 4);
2974         if (IS_ERR(cs))
2975                 return PTR_ERR(cs);
2976
2977         /*
2978          * WaDisableCtxRestoreArbitration:bdw,chv
2979          *
2980          * We don't need to perform MI_ARB_ENABLE as often as we do (in
2981          * particular all the gen that do not need the w/a at all!), if we
2982          * took care to make sure that on every switch into this context
2983          * (both ordinary and for preemption) that arbitrartion was enabled
2984          * we would be fine.  However, for gen8 there is another w/a that
2985          * requires us to not preempt inside GPGPU execution, so we keep
2986          * arbitration disabled for gen8 batches. Arbitration will be
2987          * re-enabled before we close the request
2988          * (engine->emit_fini_breadcrumb).
2989          */
2990         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2991
2992         /* FIXME(BDW+): Address space and security selectors. */
2993         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
2994                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
2995         *cs++ = lower_32_bits(offset);
2996         *cs++ = upper_32_bits(offset);
2997
2998         intel_ring_advance(rq, cs);
2999
3000         return 0;
3001 }
3002
3003 static int gen9_emit_bb_start(struct i915_request *rq,
3004                               u64 offset, u32 len,
3005                               const unsigned int flags)
3006 {
3007         u32 *cs;
3008
3009         cs = intel_ring_begin(rq, 6);
3010         if (IS_ERR(cs))
3011                 return PTR_ERR(cs);
3012
3013         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3014
3015         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3016                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3017         *cs++ = lower_32_bits(offset);
3018         *cs++ = upper_32_bits(offset);
3019
3020         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3021         *cs++ = MI_NOOP;
3022
3023         intel_ring_advance(rq, cs);
3024
3025         return 0;
3026 }
3027
3028 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3029 {
3030         ENGINE_WRITE(engine, RING_IMR,
3031                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3032         ENGINE_POSTING_READ(engine, RING_IMR);
3033 }
3034
3035 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3036 {
3037         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3038 }
3039
3040 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3041 {
3042         u32 cmd, *cs;
3043
3044         cs = intel_ring_begin(request, 4);
3045         if (IS_ERR(cs))
3046                 return PTR_ERR(cs);
3047
3048         cmd = MI_FLUSH_DW + 1;
3049
3050         /* We always require a command barrier so that subsequent
3051          * commands, such as breadcrumb interrupts, are strictly ordered
3052          * wrt the contents of the write cache being flushed to memory
3053          * (and thus being coherent from the CPU).
3054          */
3055         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3056
3057         if (mode & EMIT_INVALIDATE) {
3058                 cmd |= MI_INVALIDATE_TLB;
3059                 if (request->engine->class == VIDEO_DECODE_CLASS)
3060                         cmd |= MI_INVALIDATE_BSD;
3061         }
3062
3063         *cs++ = cmd;
3064         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3065         *cs++ = 0; /* upper addr */
3066         *cs++ = 0; /* value */
3067         intel_ring_advance(request, cs);
3068
3069         return 0;
3070 }
3071
3072 static int gen8_emit_flush_render(struct i915_request *request,
3073                                   u32 mode)
3074 {
3075         bool vf_flush_wa = false, dc_flush_wa = false;
3076         u32 *cs, flags = 0;
3077         int len;
3078
3079         flags |= PIPE_CONTROL_CS_STALL;
3080
3081         if (mode & EMIT_FLUSH) {
3082                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3083                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3084                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3085                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3086         }
3087
3088         if (mode & EMIT_INVALIDATE) {
3089                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3090                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3091                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3092                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3093                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3094                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3095                 flags |= PIPE_CONTROL_QW_WRITE;
3096                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3097
3098                 /*
3099                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3100                  * pipe control.
3101                  */
3102                 if (IS_GEN(request->i915, 9))
3103                         vf_flush_wa = true;
3104
3105                 /* WaForGAMHang:kbl */
3106                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3107                         dc_flush_wa = true;
3108         }
3109
3110         len = 6;
3111
3112         if (vf_flush_wa)
3113                 len += 6;
3114
3115         if (dc_flush_wa)
3116                 len += 12;
3117
3118         cs = intel_ring_begin(request, len);
3119         if (IS_ERR(cs))
3120                 return PTR_ERR(cs);
3121
3122         if (vf_flush_wa)
3123                 cs = gen8_emit_pipe_control(cs, 0, 0);
3124
3125         if (dc_flush_wa)
3126                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3127                                             0);
3128
3129         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3130
3131         if (dc_flush_wa)
3132                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3133
3134         intel_ring_advance(request, cs);
3135
3136         return 0;
3137 }
3138
3139 static int gen11_emit_flush_render(struct i915_request *request,
3140                                    u32 mode)
3141 {
3142         if (mode & EMIT_FLUSH) {
3143                 u32 *cs;
3144                 u32 flags = 0;
3145
3146                 flags |= PIPE_CONTROL_CS_STALL;
3147
3148                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3149                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3150                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3151                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3152                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3153                 flags |= PIPE_CONTROL_QW_WRITE;
3154                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3155
3156                 cs = intel_ring_begin(request, 6);
3157                 if (IS_ERR(cs))
3158                         return PTR_ERR(cs);
3159
3160                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3161                 intel_ring_advance(request, cs);
3162         }
3163
3164         if (mode & EMIT_INVALIDATE) {
3165                 u32 *cs;
3166                 u32 flags = 0;
3167
3168                 flags |= PIPE_CONTROL_CS_STALL;
3169
3170                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3171                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3172                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3173                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3174                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3175                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3176                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3177                 flags |= PIPE_CONTROL_QW_WRITE;
3178                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3179
3180                 cs = intel_ring_begin(request, 6);
3181                 if (IS_ERR(cs))
3182                         return PTR_ERR(cs);
3183
3184                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3185                 intel_ring_advance(request, cs);
3186         }
3187
3188         return 0;
3189 }
3190
3191 static u32 preparser_disable(bool state)
3192 {
3193         return MI_ARB_CHECK | 1 << 8 | state;
3194 }
3195
3196 static int gen12_emit_flush_render(struct i915_request *request,
3197                                    u32 mode)
3198 {
3199         if (mode & EMIT_FLUSH) {
3200                 u32 flags = 0;
3201                 u32 *cs;
3202
3203                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3204                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3205                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3206                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3207                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3208
3209                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3210                 flags |= PIPE_CONTROL_QW_WRITE;
3211
3212                 flags |= PIPE_CONTROL_CS_STALL;
3213
3214                 cs = intel_ring_begin(request, 6);
3215                 if (IS_ERR(cs))
3216                         return PTR_ERR(cs);
3217
3218                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3219                 intel_ring_advance(request, cs);
3220         }
3221
3222         if (mode & EMIT_INVALIDATE) {
3223                 u32 flags = 0;
3224                 u32 *cs;
3225
3226                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3227                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3228                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3229                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3230                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3231                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3232                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3233
3234                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3235                 flags |= PIPE_CONTROL_QW_WRITE;
3236
3237                 flags |= PIPE_CONTROL_CS_STALL;
3238
3239                 cs = intel_ring_begin(request, 8);
3240                 if (IS_ERR(cs))
3241                         return PTR_ERR(cs);
3242
3243                 /*
3244                  * Prevent the pre-parser from skipping past the TLB
3245                  * invalidate and loading a stale page for the batch
3246                  * buffer / request payload.
3247                  */
3248                 *cs++ = preparser_disable(true);
3249
3250                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3251
3252                 *cs++ = preparser_disable(false);
3253                 intel_ring_advance(request, cs);
3254         }
3255
3256         return 0;
3257 }
3258
3259 /*
3260  * Reserve space for 2 NOOPs at the end of each request to be
3261  * used as a workaround for not being allowed to do lite
3262  * restore with HEAD==TAIL (WaIdleLiteRestore).
3263  */
3264 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
3265 {
3266         /* Ensure there's always at least one preemption point per-request. */
3267         *cs++ = MI_ARB_CHECK;
3268         *cs++ = MI_NOOP;
3269         request->wa_tail = intel_ring_offset(request, cs);
3270
3271         return cs;
3272 }
3273
3274 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
3275 {
3276         *cs++ = MI_SEMAPHORE_WAIT |
3277                 MI_SEMAPHORE_GLOBAL_GTT |
3278                 MI_SEMAPHORE_POLL |
3279                 MI_SEMAPHORE_SAD_EQ_SDD;
3280         *cs++ = 0;
3281         *cs++ = intel_hws_preempt_address(request->engine);
3282         *cs++ = 0;
3283
3284         return cs;
3285 }
3286
3287 static __always_inline u32*
3288 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
3289                                  u32 *cs)
3290 {
3291         *cs++ = MI_USER_INTERRUPT;
3292
3293         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3294         if (intel_engine_has_semaphores(request->engine))
3295                 cs = emit_preempt_busywait(request, cs);
3296
3297         request->tail = intel_ring_offset(request, cs);
3298         assert_ring_tail_valid(request->ring, request->tail);
3299
3300         return gen8_emit_wa_tail(request, cs);
3301 }
3302
3303 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
3304 {
3305         cs = gen8_emit_ggtt_write(cs,
3306                                   request->fence.seqno,
3307                                   i915_request_active_timeline(request)->hwsp_offset,
3308                                   0);
3309
3310         return gen8_emit_fini_breadcrumb_footer(request, cs);
3311 }
3312
3313 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3314 {
3315         cs = gen8_emit_pipe_control(cs,
3316                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3317                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3318                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
3319                                     0);
3320
3321         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
3322         cs = gen8_emit_ggtt_write_rcs(cs,
3323                                       request->fence.seqno,
3324                                       i915_request_active_timeline(request)->hwsp_offset,
3325                                       PIPE_CONTROL_FLUSH_ENABLE |
3326                                       PIPE_CONTROL_CS_STALL);
3327
3328         return gen8_emit_fini_breadcrumb_footer(request, cs);
3329 }
3330
3331 static u32 *
3332 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3333 {
3334         cs = gen8_emit_ggtt_write_rcs(cs,
3335                                       request->fence.seqno,
3336                                       i915_request_active_timeline(request)->hwsp_offset,
3337                                       PIPE_CONTROL_CS_STALL |
3338                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
3339                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3340                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3341                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
3342                                       PIPE_CONTROL_FLUSH_ENABLE);
3343
3344         return gen8_emit_fini_breadcrumb_footer(request, cs);
3345 }
3346
3347 /*
3348  * Note that the CS instruction pre-parser will not stall on the breadcrumb
3349  * flush and will continue pre-fetching the instructions after it before the
3350  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
3351  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
3352  * of the next request before the memory has been flushed, we're guaranteed that
3353  * we won't access the batch itself too early.
3354  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
3355  * so, if the current request is modifying an instruction in the next request on
3356  * the same intel_context, we might pre-fetch and then execute the pre-update
3357  * instruction. To avoid this, the users of self-modifying code should either
3358  * disable the parser around the code emitting the memory writes, via a new flag
3359  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
3360  * the in-kernel use-cases we've opted to use a separate context, see
3361  * reloc_gpu() as an example.
3362  * All the above applies only to the instructions themselves. Non-inline data
3363  * used by the instructions is not pre-fetched.
3364  */
3365
3366 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
3367 {
3368         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
3369                 MI_SEMAPHORE_GLOBAL_GTT |
3370                 MI_SEMAPHORE_POLL |
3371                 MI_SEMAPHORE_SAD_EQ_SDD;
3372         *cs++ = 0;
3373         *cs++ = intel_hws_preempt_address(request->engine);
3374         *cs++ = 0;
3375         *cs++ = 0;
3376         *cs++ = MI_NOOP;
3377
3378         return cs;
3379 }
3380
3381 static __always_inline u32*
3382 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
3383 {
3384         *cs++ = MI_USER_INTERRUPT;
3385
3386         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3387         if (intel_engine_has_semaphores(request->engine))
3388                 cs = gen12_emit_preempt_busywait(request, cs);
3389
3390         request->tail = intel_ring_offset(request, cs);
3391         assert_ring_tail_valid(request->ring, request->tail);
3392
3393         return gen8_emit_wa_tail(request, cs);
3394 }
3395
3396 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
3397 {
3398         cs = gen8_emit_ggtt_write(cs,
3399                                   request->fence.seqno,
3400                                   i915_request_active_timeline(request)->hwsp_offset,
3401                                   0);
3402
3403         return gen12_emit_fini_breadcrumb_footer(request, cs);
3404 }
3405
3406 static u32 *
3407 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3408 {
3409         cs = gen8_emit_ggtt_write_rcs(cs,
3410                                       request->fence.seqno,
3411                                       i915_request_active_timeline(request)->hwsp_offset,
3412                                       PIPE_CONTROL_CS_STALL |
3413                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
3414                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3415                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3416                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
3417                                       PIPE_CONTROL_FLUSH_ENABLE);
3418
3419         return gen12_emit_fini_breadcrumb_footer(request, cs);
3420 }
3421
3422 static void execlists_park(struct intel_engine_cs *engine)
3423 {
3424         del_timer(&engine->execlists.timer);
3425 }
3426
3427 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
3428 {
3429         engine->submit_request = execlists_submit_request;
3430         engine->cancel_requests = execlists_cancel_requests;
3431         engine->schedule = i915_schedule;
3432         engine->execlists.tasklet.func = execlists_submission_tasklet;
3433
3434         engine->reset.prepare = execlists_reset_prepare;
3435         engine->reset.reset = execlists_reset;
3436         engine->reset.finish = execlists_reset_finish;
3437
3438         engine->park = execlists_park;
3439         engine->unpark = NULL;
3440
3441         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
3442         if (!intel_vgpu_active(engine->i915)) {
3443                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
3444                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
3445                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
3446         }
3447
3448         if (INTEL_GEN(engine->i915) >= 12)
3449                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
3450 }
3451
3452 static void execlists_destroy(struct intel_engine_cs *engine)
3453 {
3454         intel_engine_cleanup_common(engine);
3455         lrc_destroy_wa_ctx(engine);
3456         kfree(engine);
3457 }
3458
3459 static void
3460 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
3461 {
3462         /* Default vfuncs which can be overriden by each engine. */
3463
3464         engine->destroy = execlists_destroy;
3465         engine->resume = execlists_resume;
3466
3467         engine->reset.prepare = execlists_reset_prepare;
3468         engine->reset.reset = execlists_reset;
3469         engine->reset.finish = execlists_reset_finish;
3470
3471         engine->cops = &execlists_context_ops;
3472         engine->request_alloc = execlists_request_alloc;
3473
3474         engine->emit_flush = gen8_emit_flush;
3475         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3476         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
3477         if (INTEL_GEN(engine->i915) >= 12)
3478                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
3479
3480         engine->set_default_submission = intel_execlists_set_default_submission;
3481
3482         if (INTEL_GEN(engine->i915) < 11) {
3483                 engine->irq_enable = gen8_logical_ring_enable_irq;
3484                 engine->irq_disable = gen8_logical_ring_disable_irq;
3485         } else {
3486                 /*
3487                  * TODO: On Gen11 interrupt masks need to be clear
3488                  * to allow C6 entry. Keep interrupts enabled at
3489                  * and take the hit of generating extra interrupts
3490                  * until a more refined solution exists.
3491                  */
3492         }
3493         if (IS_GEN(engine->i915, 8))
3494                 engine->emit_bb_start = gen8_emit_bb_start;
3495         else
3496                 engine->emit_bb_start = gen9_emit_bb_start;
3497 }
3498
3499 static inline void
3500 logical_ring_default_irqs(struct intel_engine_cs *engine)
3501 {
3502         unsigned int shift = 0;
3503
3504         if (INTEL_GEN(engine->i915) < 11) {
3505                 const u8 irq_shifts[] = {
3506                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
3507                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
3508                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
3509                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
3510                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
3511                 };
3512
3513                 shift = irq_shifts[engine->id];
3514         }
3515
3516         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
3517         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
3518 }
3519
3520 static void rcs_submission_override(struct intel_engine_cs *engine)
3521 {
3522         switch (INTEL_GEN(engine->i915)) {
3523         case 12:
3524                 engine->emit_flush = gen12_emit_flush_render;
3525                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3526                 break;
3527         case 11:
3528                 engine->emit_flush = gen11_emit_flush_render;
3529                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3530                 break;
3531         default:
3532                 engine->emit_flush = gen8_emit_flush_render;
3533                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3534                 break;
3535         }
3536 }
3537
3538 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
3539 {
3540         tasklet_init(&engine->execlists.tasklet,
3541                      execlists_submission_tasklet, (unsigned long)engine);
3542         timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
3543
3544         logical_ring_default_vfuncs(engine);
3545         logical_ring_default_irqs(engine);
3546
3547         if (engine->class == RENDER_CLASS)
3548                 rcs_submission_override(engine);
3549
3550         return 0;
3551 }
3552
3553 int intel_execlists_submission_init(struct intel_engine_cs *engine)
3554 {
3555         struct intel_engine_execlists * const execlists = &engine->execlists;
3556         struct drm_i915_private *i915 = engine->i915;
3557         struct intel_uncore *uncore = engine->uncore;
3558         u32 base = engine->mmio_base;
3559         int ret;
3560
3561         ret = intel_engine_init_common(engine);
3562         if (ret)
3563                 return ret;
3564
3565         if (intel_init_workaround_bb(engine))
3566                 /*
3567                  * We continue even if we fail to initialize WA batch
3568                  * because we only expect rare glitches but nothing
3569                  * critical to prevent us from using GPU
3570                  */
3571                 DRM_ERROR("WA batch buffer initialization failed\n");
3572
3573         if (HAS_LOGICAL_RING_ELSQ(i915)) {
3574                 execlists->submit_reg = uncore->regs +
3575                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
3576                 execlists->ctrl_reg = uncore->regs +
3577                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
3578         } else {
3579                 execlists->submit_reg = uncore->regs +
3580                         i915_mmio_reg_offset(RING_ELSP(base));
3581         }
3582
3583         execlists->csb_status =
3584                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
3585
3586         execlists->csb_write =
3587                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
3588
3589         if (INTEL_GEN(i915) < 11)
3590                 execlists->csb_size = GEN8_CSB_ENTRIES;
3591         else
3592                 execlists->csb_size = GEN11_CSB_ENTRIES;
3593
3594         reset_csb_pointers(engine);
3595
3596         return 0;
3597 }
3598
3599 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
3600 {
3601         u32 indirect_ctx_offset;
3602
3603         switch (INTEL_GEN(engine->i915)) {
3604         default:
3605                 MISSING_CASE(INTEL_GEN(engine->i915));
3606                 /* fall through */
3607         case 12:
3608                 indirect_ctx_offset =
3609                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3610                 break;
3611         case 11:
3612                 indirect_ctx_offset =
3613                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3614                 break;
3615         case 10:
3616                 indirect_ctx_offset =
3617                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3618                 break;
3619         case 9:
3620                 indirect_ctx_offset =
3621                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3622                 break;
3623         case 8:
3624                 indirect_ctx_offset =
3625                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3626                 break;
3627         }
3628
3629         return indirect_ctx_offset;
3630 }
3631
3632
3633 static void init_common_reg_state(u32 * const regs,
3634                                   const struct intel_engine_cs *engine,
3635                                   const struct intel_ring *ring)
3636 {
3637         regs[CTX_CONTEXT_CONTROL] =
3638                 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
3639                 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
3640         if (INTEL_GEN(engine->i915) < 11)
3641                 regs[CTX_CONTEXT_CONTROL] |=
3642                         _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
3643                                             CTX_CTRL_RS_CTX_ENABLE);
3644
3645         regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3646         regs[CTX_BB_STATE] = RING_BB_PPGTT;
3647 }
3648
3649 static void init_wa_bb_reg_state(u32 * const regs,
3650                                  const struct intel_engine_cs *engine,
3651                                  u32 pos_bb_per_ctx)
3652 {
3653         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
3654
3655         if (wa_ctx->per_ctx.size) {
3656                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
3657
3658                 regs[pos_bb_per_ctx] =
3659                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
3660         }
3661
3662         if (wa_ctx->indirect_ctx.size) {
3663                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
3664
3665                 regs[pos_bb_per_ctx + 2] =
3666                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
3667                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
3668
3669                 regs[pos_bb_per_ctx + 4] =
3670                         intel_lr_indirect_ctx_offset(engine) << 6;
3671         }
3672 }
3673
3674 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
3675 {
3676         if (i915_vm_is_4lvl(&ppgtt->vm)) {
3677                 /* 64b PPGTT (48bit canonical)
3678                  * PDP0_DESCRIPTOR contains the base address to PML4 and
3679                  * other PDP Descriptors are ignored.
3680                  */
3681                 ASSIGN_CTX_PML4(ppgtt, regs);
3682         } else {
3683                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
3684                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
3685                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
3686                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
3687         }
3688 }
3689
3690 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
3691 {
3692         if (i915_is_ggtt(vm))
3693                 return i915_vm_to_ggtt(vm)->alias;
3694         else
3695                 return i915_vm_to_ppgtt(vm);
3696 }
3697
3698 static void execlists_init_reg_state(u32 *regs,
3699                                      const struct intel_context *ce,
3700                                      const struct intel_engine_cs *engine,
3701                                      const struct intel_ring *ring,
3702                                      bool close)
3703 {
3704         /*
3705          * A context is actually a big batch buffer with several
3706          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
3707          * values we are setting here are only for the first context restore:
3708          * on a subsequent save, the GPU will recreate this batchbuffer with new
3709          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
3710          * we are not initializing here).
3711          *
3712          * Must keep consistent with virtual_update_register_offsets().
3713          */
3714         u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
3715
3716         if (close) { /* Close the batch; used mainly by live_lrc_layout() */
3717                 *bbe = MI_BATCH_BUFFER_END;
3718                 if (INTEL_GEN(engine->i915) >= 10)
3719                         *bbe |= BIT(0);
3720         }
3721
3722         init_common_reg_state(regs, engine, ring);
3723         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
3724
3725         init_wa_bb_reg_state(regs, engine,
3726                              INTEL_GEN(engine->i915) >= 12 ?
3727                              GEN12_CTX_BB_PER_CTX_PTR :
3728                              CTX_BB_PER_CTX_PTR);
3729 }
3730
3731 static int
3732 populate_lr_context(struct intel_context *ce,
3733                     struct drm_i915_gem_object *ctx_obj,
3734                     struct intel_engine_cs *engine,
3735                     struct intel_ring *ring)
3736 {
3737         bool inhibit = true;
3738         void *vaddr;
3739         u32 *regs;
3740         int ret;
3741
3742         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
3743         if (IS_ERR(vaddr)) {
3744                 ret = PTR_ERR(vaddr);
3745                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
3746                 return ret;
3747         }
3748
3749         set_redzone(vaddr, engine);
3750
3751         if (engine->default_state) {
3752                 /*
3753                  * We only want to copy over the template context state;
3754                  * skipping over the headers reserved for GuC communication,
3755                  * leaving those as zero.
3756                  */
3757                 const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
3758                 void *defaults;
3759
3760                 defaults = i915_gem_object_pin_map(engine->default_state,
3761                                                    I915_MAP_WB);
3762                 if (IS_ERR(defaults)) {
3763                         ret = PTR_ERR(defaults);
3764                         goto err_unpin_ctx;
3765                 }
3766
3767                 memcpy(vaddr + start, defaults + start, engine->context_size);
3768                 i915_gem_object_unpin_map(engine->default_state);
3769                 inhibit = false;
3770         }
3771
3772         /* The second page of the context object contains some fields which must
3773          * be set up prior to the first execution. */
3774         regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
3775         execlists_init_reg_state(regs, ce, engine, ring, inhibit);
3776         if (inhibit)
3777                 regs[CTX_CONTEXT_CONTROL] |=
3778                         _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
3779
3780         ret = 0;
3781 err_unpin_ctx:
3782         __i915_gem_object_flush_map(ctx_obj,
3783                                     LRC_HEADER_PAGES * PAGE_SIZE,
3784                                     engine->context_size);
3785         i915_gem_object_unpin_map(ctx_obj);
3786         return ret;
3787 }
3788
3789 static int __execlists_context_alloc(struct intel_context *ce,
3790                                      struct intel_engine_cs *engine)
3791 {
3792         struct drm_i915_gem_object *ctx_obj;
3793         struct intel_ring *ring;
3794         struct i915_vma *vma;
3795         u32 context_size;
3796         int ret;
3797
3798         GEM_BUG_ON(ce->state);
3799         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
3800
3801         /*
3802          * Before the actual start of the context image, we insert a few pages
3803          * for our own use and for sharing with the GuC.
3804          */
3805         context_size += LRC_HEADER_PAGES * PAGE_SIZE;
3806         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3807                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
3808
3809         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
3810         if (IS_ERR(ctx_obj))
3811                 return PTR_ERR(ctx_obj);
3812
3813         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
3814         if (IS_ERR(vma)) {
3815                 ret = PTR_ERR(vma);
3816                 goto error_deref_obj;
3817         }
3818
3819         if (!ce->timeline) {
3820                 struct intel_timeline *tl;
3821
3822                 tl = intel_timeline_create(engine->gt, NULL);
3823                 if (IS_ERR(tl)) {
3824                         ret = PTR_ERR(tl);
3825                         goto error_deref_obj;
3826                 }
3827
3828                 ce->timeline = tl;
3829         }
3830
3831         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
3832         if (IS_ERR(ring)) {
3833                 ret = PTR_ERR(ring);
3834                 goto error_deref_obj;
3835         }
3836
3837         ret = populate_lr_context(ce, ctx_obj, engine, ring);
3838         if (ret) {
3839                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
3840                 goto error_ring_free;
3841         }
3842
3843         ce->ring = ring;
3844         ce->state = vma;
3845
3846         return 0;
3847
3848 error_ring_free:
3849         intel_ring_put(ring);
3850 error_deref_obj:
3851         i915_gem_object_put(ctx_obj);
3852         return ret;
3853 }
3854
3855 static struct list_head *virtual_queue(struct virtual_engine *ve)
3856 {
3857         return &ve->base.execlists.default_priolist.requests[0];
3858 }
3859
3860 static void virtual_context_destroy(struct kref *kref)
3861 {
3862         struct virtual_engine *ve =
3863                 container_of(kref, typeof(*ve), context.ref);
3864         unsigned int n;
3865
3866         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3867         GEM_BUG_ON(ve->request);
3868         GEM_BUG_ON(ve->context.inflight);
3869
3870         for (n = 0; n < ve->num_siblings; n++) {
3871                 struct intel_engine_cs *sibling = ve->siblings[n];
3872                 struct rb_node *node = &ve->nodes[sibling->id].rb;
3873
3874                 if (RB_EMPTY_NODE(node))
3875                         continue;
3876
3877                 spin_lock_irq(&sibling->active.lock);
3878
3879                 /* Detachment is lazily performed in the execlists tasklet */
3880                 if (!RB_EMPTY_NODE(node))
3881                         rb_erase_cached(node, &sibling->execlists.virtual);
3882
3883                 spin_unlock_irq(&sibling->active.lock);
3884         }
3885         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
3886
3887         if (ve->context.state)
3888                 __execlists_context_fini(&ve->context);
3889         intel_context_fini(&ve->context);
3890
3891         kfree(ve->bonds);
3892         kfree(ve);
3893 }
3894
3895 static void virtual_engine_initial_hint(struct virtual_engine *ve)
3896 {
3897         int swp;
3898
3899         /*
3900          * Pick a random sibling on starting to help spread the load around.
3901          *
3902          * New contexts are typically created with exactly the same order
3903          * of siblings, and often started in batches. Due to the way we iterate
3904          * the array of sibling when submitting requests, sibling[0] is
3905          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
3906          * randomised across the system, we also help spread the load by the
3907          * first engine we inspect being different each time.
3908          *
3909          * NB This does not force us to execute on this engine, it will just
3910          * typically be the first we inspect for submission.
3911          */
3912         swp = prandom_u32_max(ve->num_siblings);
3913         if (!swp)
3914                 return;
3915
3916         swap(ve->siblings[swp], ve->siblings[0]);
3917         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
3918                 virtual_update_register_offsets(ve->context.lrc_reg_state,
3919                                                 ve->siblings[0]);
3920 }
3921
3922 static int virtual_context_pin(struct intel_context *ce)
3923 {
3924         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3925         int err;
3926
3927         /* Note: we must use a real engine class for setting up reg state */
3928         err = __execlists_context_pin(ce, ve->siblings[0]);
3929         if (err)
3930                 return err;
3931
3932         virtual_engine_initial_hint(ve);
3933         return 0;
3934 }
3935
3936 static void virtual_context_enter(struct intel_context *ce)
3937 {
3938         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3939         unsigned int n;
3940
3941         for (n = 0; n < ve->num_siblings; n++)
3942                 intel_engine_pm_get(ve->siblings[n]);
3943
3944         intel_timeline_enter(ce->timeline);
3945 }
3946
3947 static void virtual_context_exit(struct intel_context *ce)
3948 {
3949         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3950         unsigned int n;
3951
3952         intel_timeline_exit(ce->timeline);
3953
3954         for (n = 0; n < ve->num_siblings; n++)
3955                 intel_engine_pm_put(ve->siblings[n]);
3956 }
3957
3958 static const struct intel_context_ops virtual_context_ops = {
3959         .pin = virtual_context_pin,
3960         .unpin = execlists_context_unpin,
3961
3962         .enter = virtual_context_enter,
3963         .exit = virtual_context_exit,
3964
3965         .destroy = virtual_context_destroy,
3966 };
3967
3968 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
3969 {
3970         struct i915_request *rq;
3971         intel_engine_mask_t mask;
3972
3973         rq = READ_ONCE(ve->request);
3974         if (!rq)
3975                 return 0;
3976
3977         /* The rq is ready for submission; rq->execution_mask is now stable. */
3978         mask = rq->execution_mask;
3979         if (unlikely(!mask)) {
3980                 /* Invalid selection, submit to a random engine in error */
3981                 i915_request_skip(rq, -ENODEV);
3982                 mask = ve->siblings[0]->mask;
3983         }
3984
3985         GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
3986                   ve->base.name,
3987                   rq->fence.context, rq->fence.seqno,
3988                   mask, ve->base.execlists.queue_priority_hint);
3989
3990         return mask;
3991 }
3992
3993 static void virtual_submission_tasklet(unsigned long data)
3994 {
3995         struct virtual_engine * const ve = (struct virtual_engine *)data;
3996         const int prio = ve->base.execlists.queue_priority_hint;
3997         intel_engine_mask_t mask;
3998         unsigned int n;
3999
4000         rcu_read_lock();
4001         mask = virtual_submission_mask(ve);
4002         rcu_read_unlock();
4003         if (unlikely(!mask))
4004                 return;
4005
4006         local_irq_disable();
4007         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4008                 struct intel_engine_cs *sibling = ve->siblings[n];
4009                 struct ve_node * const node = &ve->nodes[sibling->id];
4010                 struct rb_node **parent, *rb;
4011                 bool first;
4012
4013                 if (unlikely(!(mask & sibling->mask))) {
4014                         if (!RB_EMPTY_NODE(&node->rb)) {
4015                                 spin_lock(&sibling->active.lock);
4016                                 rb_erase_cached(&node->rb,
4017                                                 &sibling->execlists.virtual);
4018                                 RB_CLEAR_NODE(&node->rb);
4019                                 spin_unlock(&sibling->active.lock);
4020                         }
4021                         continue;
4022                 }
4023
4024                 spin_lock(&sibling->active.lock);
4025
4026                 if (!RB_EMPTY_NODE(&node->rb)) {
4027                         /*
4028                          * Cheat and avoid rebalancing the tree if we can
4029                          * reuse this node in situ.
4030                          */
4031                         first = rb_first_cached(&sibling->execlists.virtual) ==
4032                                 &node->rb;
4033                         if (prio == node->prio || (prio > node->prio && first))
4034                                 goto submit_engine;
4035
4036                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4037                 }
4038
4039                 rb = NULL;
4040                 first = true;
4041                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4042                 while (*parent) {
4043                         struct ve_node *other;
4044
4045                         rb = *parent;
4046                         other = rb_entry(rb, typeof(*other), rb);
4047                         if (prio > other->prio) {
4048                                 parent = &rb->rb_left;
4049                         } else {
4050                                 parent = &rb->rb_right;
4051                                 first = false;
4052                         }
4053                 }
4054
4055                 rb_link_node(&node->rb, rb, parent);
4056                 rb_insert_color_cached(&node->rb,
4057                                        &sibling->execlists.virtual,
4058                                        first);
4059
4060 submit_engine:
4061                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4062                 node->prio = prio;
4063                 if (first && prio > sibling->execlists.queue_priority_hint) {
4064                         sibling->execlists.queue_priority_hint = prio;
4065                         tasklet_hi_schedule(&sibling->execlists.tasklet);
4066                 }
4067
4068                 spin_unlock(&sibling->active.lock);
4069         }
4070         local_irq_enable();
4071 }
4072
4073 static void virtual_submit_request(struct i915_request *rq)
4074 {
4075         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4076         struct i915_request *old;
4077         unsigned long flags;
4078
4079         GEM_TRACE("%s: rq=%llx:%lld\n",
4080                   ve->base.name,
4081                   rq->fence.context,
4082                   rq->fence.seqno);
4083
4084         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4085
4086         spin_lock_irqsave(&ve->base.active.lock, flags);
4087
4088         old = ve->request;
4089         if (old) { /* background completion event from preempt-to-busy */
4090                 GEM_BUG_ON(!i915_request_completed(old));
4091                 __i915_request_submit(old);
4092                 i915_request_put(old);
4093         }
4094
4095         if (i915_request_completed(rq)) {
4096                 __i915_request_submit(rq);
4097
4098                 ve->base.execlists.queue_priority_hint = INT_MIN;
4099                 ve->request = NULL;
4100         } else {
4101                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
4102                 ve->request = i915_request_get(rq);
4103
4104                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4105                 list_move_tail(&rq->sched.link, virtual_queue(ve));
4106
4107                 tasklet_schedule(&ve->base.execlists.tasklet);
4108         }
4109
4110         spin_unlock_irqrestore(&ve->base.active.lock, flags);
4111 }
4112
4113 static struct ve_bond *
4114 virtual_find_bond(struct virtual_engine *ve,
4115                   const struct intel_engine_cs *master)
4116 {
4117         int i;
4118
4119         for (i = 0; i < ve->num_bonds; i++) {
4120                 if (ve->bonds[i].master == master)
4121                         return &ve->bonds[i];
4122         }
4123
4124         return NULL;
4125 }
4126
4127 static void
4128 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
4129 {
4130         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4131         intel_engine_mask_t allowed, exec;
4132         struct ve_bond *bond;
4133
4134         allowed = ~to_request(signal)->engine->mask;
4135
4136         bond = virtual_find_bond(ve, to_request(signal)->engine);
4137         if (bond)
4138                 allowed &= bond->sibling_mask;
4139
4140         /* Restrict the bonded request to run on only the available engines */
4141         exec = READ_ONCE(rq->execution_mask);
4142         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
4143                 ;
4144
4145         /* Prevent the master from being re-run on the bonded engines */
4146         to_request(signal)->execution_mask &= ~allowed;
4147 }
4148
4149 struct intel_context *
4150 intel_execlists_create_virtual(struct i915_gem_context *ctx,
4151                                struct intel_engine_cs **siblings,
4152                                unsigned int count)
4153 {
4154         struct virtual_engine *ve;
4155         unsigned int n;
4156         int err;
4157
4158         if (count == 0)
4159                 return ERR_PTR(-EINVAL);
4160
4161         if (count == 1)
4162                 return intel_context_create(ctx, siblings[0]);
4163
4164         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
4165         if (!ve)
4166                 return ERR_PTR(-ENOMEM);
4167
4168         ve->base.i915 = ctx->i915;
4169         ve->base.gt = siblings[0]->gt;
4170         ve->base.uncore = siblings[0]->uncore;
4171         ve->base.id = -1;
4172         ve->base.class = OTHER_CLASS;
4173         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4174         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4175
4176         /*
4177          * The decision on whether to submit a request using semaphores
4178          * depends on the saturated state of the engine. We only compute
4179          * this during HW submission of the request, and we need for this
4180          * state to be globally applied to all requests being submitted
4181          * to this engine. Virtual engines encompass more than one physical
4182          * engine and so we cannot accurately tell in advance if one of those
4183          * engines is already saturated and so cannot afford to use a semaphore
4184          * and be pessimized in priority for doing so -- if we are the only
4185          * context using semaphores after all other clients have stopped, we
4186          * will be starved on the saturated system. Such a global switch for
4187          * semaphores is less than ideal, but alas is the current compromise.
4188          */
4189         ve->base.saturated = ALL_ENGINES;
4190
4191         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4192
4193         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
4194         intel_engine_init_breadcrumbs(&ve->base);
4195
4196         intel_engine_init_execlists(&ve->base);
4197
4198         ve->base.cops = &virtual_context_ops;
4199         ve->base.request_alloc = execlists_request_alloc;
4200
4201         ve->base.schedule = i915_schedule;
4202         ve->base.submit_request = virtual_submit_request;
4203         ve->base.bond_execute = virtual_bond_execute;
4204
4205         INIT_LIST_HEAD(virtual_queue(ve));
4206         ve->base.execlists.queue_priority_hint = INT_MIN;
4207         tasklet_init(&ve->base.execlists.tasklet,
4208                      virtual_submission_tasklet,
4209                      (unsigned long)ve);
4210
4211         intel_context_init(&ve->context, ctx, &ve->base);
4212
4213         for (n = 0; n < count; n++) {
4214                 struct intel_engine_cs *sibling = siblings[n];
4215
4216                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4217                 if (sibling->mask & ve->base.mask) {
4218                         DRM_DEBUG("duplicate %s entry in load balancer\n",
4219                                   sibling->name);
4220                         err = -EINVAL;
4221                         goto err_put;
4222                 }
4223
4224                 /*
4225                  * The virtual engine implementation is tightly coupled to
4226                  * the execlists backend -- we push out request directly
4227                  * into a tree inside each physical engine. We could support
4228                  * layering if we handle cloning of the requests and
4229                  * submitting a copy into each backend.
4230                  */
4231                 if (sibling->execlists.tasklet.func !=
4232                     execlists_submission_tasklet) {
4233                         err = -ENODEV;
4234                         goto err_put;
4235                 }
4236
4237                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
4238                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
4239
4240                 ve->siblings[ve->num_siblings++] = sibling;
4241                 ve->base.mask |= sibling->mask;
4242
4243                 /*
4244                  * All physical engines must be compatible for their emission
4245                  * functions (as we build the instructions during request
4246                  * construction and do not alter them before submission
4247                  * on the physical engine). We use the engine class as a guide
4248                  * here, although that could be refined.
4249                  */
4250                 if (ve->base.class != OTHER_CLASS) {
4251                         if (ve->base.class != sibling->class) {
4252                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
4253                                           sibling->class, ve->base.class);
4254                                 err = -EINVAL;
4255                                 goto err_put;
4256                         }
4257                         continue;
4258                 }
4259
4260                 ve->base.class = sibling->class;
4261                 ve->base.uabi_class = sibling->uabi_class;
4262                 snprintf(ve->base.name, sizeof(ve->base.name),
4263                          "v%dx%d", ve->base.class, count);
4264                 ve->base.context_size = sibling->context_size;
4265
4266                 ve->base.emit_bb_start = sibling->emit_bb_start;
4267                 ve->base.emit_flush = sibling->emit_flush;
4268                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
4269                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
4270                 ve->base.emit_fini_breadcrumb_dw =
4271                         sibling->emit_fini_breadcrumb_dw;
4272
4273                 ve->base.flags = sibling->flags;
4274         }
4275
4276         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
4277
4278         err = __execlists_context_alloc(&ve->context, siblings[0]);
4279         if (err)
4280                 goto err_put;
4281
4282         __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
4283
4284         return &ve->context;
4285
4286 err_put:
4287         intel_context_put(&ve->context);
4288         return ERR_PTR(err);
4289 }
4290
4291 struct intel_context *
4292 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
4293                               struct intel_engine_cs *src)
4294 {
4295         struct virtual_engine *se = to_virtual_engine(src);
4296         struct intel_context *dst;
4297
4298         dst = intel_execlists_create_virtual(ctx,
4299                                              se->siblings,
4300                                              se->num_siblings);
4301         if (IS_ERR(dst))
4302                 return dst;
4303
4304         if (se->num_bonds) {
4305                 struct virtual_engine *de = to_virtual_engine(dst->engine);
4306
4307                 de->bonds = kmemdup(se->bonds,
4308                                     sizeof(*se->bonds) * se->num_bonds,
4309                                     GFP_KERNEL);
4310                 if (!de->bonds) {
4311                         intel_context_put(dst);
4312                         return ERR_PTR(-ENOMEM);
4313                 }
4314
4315                 de->num_bonds = se->num_bonds;
4316         }
4317
4318         return dst;
4319 }
4320
4321 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
4322                                      const struct intel_engine_cs *master,
4323                                      const struct intel_engine_cs *sibling)
4324 {
4325         struct virtual_engine *ve = to_virtual_engine(engine);
4326         struct ve_bond *bond;
4327         int n;
4328
4329         /* Sanity check the sibling is part of the virtual engine */
4330         for (n = 0; n < ve->num_siblings; n++)
4331                 if (sibling == ve->siblings[n])
4332                         break;
4333         if (n == ve->num_siblings)
4334                 return -EINVAL;
4335
4336         bond = virtual_find_bond(ve, master);
4337         if (bond) {
4338                 bond->sibling_mask |= sibling->mask;
4339                 return 0;
4340         }
4341
4342         bond = krealloc(ve->bonds,
4343                         sizeof(*bond) * (ve->num_bonds + 1),
4344                         GFP_KERNEL);
4345         if (!bond)
4346                 return -ENOMEM;
4347
4348         bond[ve->num_bonds].master = master;
4349         bond[ve->num_bonds].sibling_mask = sibling->mask;
4350
4351         ve->bonds = bond;
4352         ve->num_bonds++;
4353
4354         return 0;
4355 }
4356
4357 struct intel_engine_cs *
4358 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
4359                                  unsigned int sibling)
4360 {
4361         struct virtual_engine *ve = to_virtual_engine(engine);
4362
4363         if (sibling >= ve->num_siblings)
4364                 return NULL;
4365
4366         return ve->siblings[sibling];
4367 }
4368
4369 void intel_execlists_show_requests(struct intel_engine_cs *engine,
4370                                    struct drm_printer *m,
4371                                    void (*show_request)(struct drm_printer *m,
4372                                                         struct i915_request *rq,
4373                                                         const char *prefix),
4374                                    unsigned int max)
4375 {
4376         const struct intel_engine_execlists *execlists = &engine->execlists;
4377         struct i915_request *rq, *last;
4378         unsigned long flags;
4379         unsigned int count;
4380         struct rb_node *rb;
4381
4382         spin_lock_irqsave(&engine->active.lock, flags);
4383
4384         last = NULL;
4385         count = 0;
4386         list_for_each_entry(rq, &engine->active.requests, sched.link) {
4387                 if (count++ < max - 1)
4388                         show_request(m, rq, "\t\tE ");
4389                 else
4390                         last = rq;
4391         }
4392         if (last) {
4393                 if (count > max) {
4394                         drm_printf(m,
4395                                    "\t\t...skipping %d executing requests...\n",
4396                                    count - max);
4397                 }
4398                 show_request(m, last, "\t\tE ");
4399         }
4400
4401         last = NULL;
4402         count = 0;
4403         if (execlists->queue_priority_hint != INT_MIN)
4404                 drm_printf(m, "\t\tQueue priority hint: %d\n",
4405                            execlists->queue_priority_hint);
4406         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
4407                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
4408                 int i;
4409
4410                 priolist_for_each_request(rq, p, i) {
4411                         if (count++ < max - 1)
4412                                 show_request(m, rq, "\t\tQ ");
4413                         else
4414                                 last = rq;
4415                 }
4416         }
4417         if (last) {
4418                 if (count > max) {
4419                         drm_printf(m,
4420                                    "\t\t...skipping %d queued requests...\n",
4421                                    count - max);
4422                 }
4423                 show_request(m, last, "\t\tQ ");
4424         }
4425
4426         last = NULL;
4427         count = 0;
4428         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
4429                 struct virtual_engine *ve =
4430                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4431                 struct i915_request *rq = READ_ONCE(ve->request);
4432
4433                 if (rq) {
4434                         if (count++ < max - 1)
4435                                 show_request(m, rq, "\t\tV ");
4436                         else
4437                                 last = rq;
4438                 }
4439         }
4440         if (last) {
4441                 if (count > max) {
4442                         drm_printf(m,
4443                                    "\t\t...skipping %d virtual requests...\n",
4444                                    count - max);
4445                 }
4446                 show_request(m, last, "\t\tV ");
4447         }
4448
4449         spin_unlock_irqrestore(&engine->active.lock, flags);
4450 }
4451
4452 void intel_lr_context_reset(struct intel_engine_cs *engine,
4453                             struct intel_context *ce,
4454                             u32 head,
4455                             bool scrub)
4456 {
4457         GEM_BUG_ON(!intel_context_is_pinned(ce));
4458         __context_pin_acquire(ce);
4459
4460         /*
4461          * We want a simple context + ring to execute the breadcrumb update.
4462          * We cannot rely on the context being intact across the GPU hang,
4463          * so clear it and rebuild just what we need for the breadcrumb.
4464          * All pending requests for this context will be zapped, and any
4465          * future request will be after userspace has had the opportunity
4466          * to recreate its own state.
4467          */
4468         if (scrub) {
4469                 u32 *regs = ce->lrc_reg_state;
4470
4471                 if (engine->pinned_default_state) {
4472                         memcpy(regs, /* skip restoring the vanilla PPHWSP */
4473                                engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
4474                                engine->context_size - PAGE_SIZE);
4475                 }
4476                 execlists_init_reg_state(regs, ce, engine, ce->ring, false);
4477         }
4478
4479         /* Rerun the request; its payload has been neutered (if guilty). */
4480         ce->ring->head = head;
4481         intel_ring_update_space(ce->ring);
4482
4483         __execlists_update_reg_state(ce, engine);
4484         __context_pin_release(ce);
4485 }
4486
4487 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4488 #include "selftest_lrc.c"
4489 #endif