Merge drm/drm-next into drm-intel-next-queued

[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_lrc.c
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index e3a5f673ff673d2784bfc2bff7ca14c0a959578e..57396a2a6ea206d176a2e1c70cb9af95b21438bd 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,6 +139,7 @@
  #include "i915_gem_render_state.h"
  #include "intel_lrc_reg.h"
  #include "intel_mocs.h"
+#include "intel_workarounds.h"
  
  #define RING_EXECLIST_QFULL            (1 << 0x2)
  #define RING_EXECLIST1_VALID           (1 << 0x3)
@@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  
  static inline int rq_prio(const struct i915_request *rq)
  {
-       return rq->priotree.priority;
+       return rq->sched.attr.priority;
  }
  
  static inline bool need_preempt(const struct intel_engine_cs *engine,
                                 const struct i915_request *last,
                                 int prio)
  {
-       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+       return (intel_engine_has_preemption(engine) &&
+               __execlists_need_preempt(prio, rq_prio(last)) &&
+               !i915_request_completed(last));
  }
  
  /**
@@ -221,7 +224,7 @@ static void
  intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
                                    struct intel_engine_cs *engine)
  {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
         u64 desc;
  
         BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -256,7 +259,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
  
  static struct i915_priolist *
  lookup_priolist(struct intel_engine_cs *engine,
-               struct i915_priotree *pt,
+               struct i915_sched_node *node,
                 int prio)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -342,10 +345,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
                 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
                 if (rq_prio(rq) != last_prio) {
                         last_prio = rq_prio(rq);
-                       p = lookup_priolist(engine, &rq->priotree, last_prio);
+                       p = lookup_priolist(engine, &rq->sched, last_prio);
                 }
  
-               list_add(&rq->priotree.link, &p->requests);
+               list_add(&rq->sched.link, &p->requests);
         }
  }
  
@@ -374,6 +377,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
                                    status, rq);
  }
  
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+                    const struct execlist_port *port)
+{
+       execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+       execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
  static inline void
  execlists_context_schedule_in(struct i915_request *rq)
  {
@@ -399,7 +415,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
  
  static u64 execlists_update_context(struct i915_request *rq)
  {
-       struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
+       struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
         struct i915_hw_ppgtt *ppgtt =
                 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
         u32 *reg_state = ce->lrc_reg_state;
@@ -454,10 +470,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
                         desc = execlists_update_context(rq);
                         GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
  
-                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
+                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
                                   engine->name, n,
                                   port[n].context_id, count,
                                   rq->global_seqno,
+                                 rq->fence.context, rq->fence.seqno,
+                                 intel_engine_get_seqno(engine),
                                   rq_prio(rq));
                 } else {
                         GEM_BUG_ON(!n);
@@ -506,7 +524,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
  {
         struct intel_engine_execlists *execlists = &engine->execlists;
         struct intel_context *ce =
-               &engine->i915->preempt_context->engine[engine->id];
+               to_intel_context(engine->i915->preempt_context, engine);
         unsigned int n;
  
         GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -637,7 +655,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 struct i915_priolist *p = to_priolist(rb);
                 struct i915_request *rq, *rn;
  
-               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+               list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
                         /*
                          * Can we combine this request with the current port?
                          * It has to be the same context/ringbuffer and not
@@ -657,7 +675,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                  */
                                 if (port == last_port) {
                                         __list_del_many(&p->requests,
-                                                       &rq->priotree.link);
+                                                       &rq->sched.link);
                                         goto done;
                                 }
  
@@ -671,7 +689,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                 if (ctx_single_port_submission(last->ctx) ||
                                     ctx_single_port_submission(rq->ctx)) {
                                         __list_del_many(&p->requests,
-                                                       &rq->priotree.link);
+                                                       &rq->sched.link);
                                         goto done;
                                 }
  
@@ -684,7 +702,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                 GEM_BUG_ON(port_isset(port));
                         }
  
-                       INIT_LIST_HEAD(&rq->priotree.link);
+                       INIT_LIST_HEAD(&rq->sched.link);
                         __i915_request_submit(rq);
                         trace_i915_request_in(rq, port_index(port, execlists));
                         last = rq;
@@ -697,8 +715,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 if (p->priority != I915_PRIORITY_NORMAL)
                         kmem_cache_free(engine->i915->priorities, p);
         }
+
  done:
-       execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+       /*
+        * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+        *
+        * We choose queue_priority such that if we add a request of greater
+        * priority than this, we kick the submission tasklet to decide on
+        * the right order of submitting the requests to hardware. We must
+        * also be prepared to reorder requests as they are in-flight on the
+        * HW. We derive the queue_priority then as the first "hole" in
+        * the HW submission ports and if there are no available slots,
+        * the priority of the lowest executing request, i.e. last.
+        *
+        * When we do receive a higher priority request ready to run from the
+        * user, see queue_request(), the queue_priority is bumped to that
+        * request triggering preemption on the next dequeue (or subsequent
+        * interrupt for secondary ports).
+        */
+       execlists->queue_priority =
+               port != execlists->port ? rq_prio(last) : INT_MIN;
+
         execlists->first = rb;
         if (submit)
                 port_assign(port, last);
@@ -710,7 +747,7 @@ unlock:
         spin_unlock_irq(&engine->timeline->lock);
  
         if (submit) {
-               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
+               execlists_user_begin(execlists, execlists->port);
                 execlists_submit_ports(engine);
         }
  
@@ -727,6 +764,13 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
         while (num_ports-- && port_isset(port)) {
                 struct i915_request *rq = port_request(port);
  
+               GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n",
+                         rq->engine->name,
+                         (unsigned int)(port - execlists->port),
+                         rq->global_seqno,
+                         rq->fence.context, rq->fence.seqno,
+                         intel_engine_get_seqno(rq->engine));
+
                 GEM_BUG_ON(!execlists->active);
                 intel_engine_context_out(rq->engine);
  
@@ -742,6 +786,82 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
         }
  
         execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+       execlists_user_end(execlists);
+}
+
+static void clear_gtiir(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       int i;
+
+       /*
+        * Clear any pending interrupt state.
+        *
+        * We do it twice out of paranoia that some of the IIR are
+        * double buffered, and so if we only reset it once there may
+        * still be an interrupt pending.
+        */
+       if (INTEL_GEN(dev_priv) >= 11) {
+               static const struct {
+                       u8 bank;
+                       u8 bit;
+               } gen11_gtiir[] = {
+                       [RCS] = {0, GEN11_RCS0},
+                       [BCS] = {0, GEN11_BCS},
+                       [_VCS(0)] = {1, GEN11_VCS(0)},
+                       [_VCS(1)] = {1, GEN11_VCS(1)},
+                       [_VCS(2)] = {1, GEN11_VCS(2)},
+                       [_VCS(3)] = {1, GEN11_VCS(3)},
+                       [_VECS(0)] = {1, GEN11_VECS(0)},
+                       [_VECS(1)] = {1, GEN11_VECS(1)},
+               };
+               unsigned long irqflags;
+
+               GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
+
+               spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+               for (i = 0; i < 2; i++) {
+                       gen11_reset_one_iir(dev_priv,
+                                           gen11_gtiir[engine->id].bank,
+                                           gen11_gtiir[engine->id].bit);
+               }
+               spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+       } else {
+               static const u8 gtiir[] = {
+                       [RCS]  = 0,
+                       [BCS]  = 0,
+                       [VCS]  = 1,
+                       [VCS2] = 1,
+                       [VECS] = 3,
+               };
+
+               GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+               for (i = 0; i < 2; i++) {
+                       I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+                                  engine->irq_keep_mask);
+                       POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+               }
+               GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+                          engine->irq_keep_mask);
+       }
+}
+
+static void reset_irq(struct intel_engine_cs *engine)
+{
+       /* Mark all CS interrupts as complete */
+       smp_store_mb(engine->execlists.active, 0);
+       synchronize_hardirq(engine->i915->drm.irq);
+
+       clear_gtiir(engine);
+
+       /*
+        * The port is checked prior to scheduling a tasklet, but
+        * just in case we have suspended the tasklet to do the
+        * wedging make sure that when it wakes, it decides there
+        * is no work to do by clearing the irq_posted bit.
+        */
+       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
  }
  
  static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -751,7 +871,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
         struct rb_node *rb;
         unsigned long flags;
  
-       GEM_TRACE("%s\n", engine->name);
+       GEM_TRACE("%s current %d\n",
+                 engine->name, intel_engine_get_seqno(engine));
  
         /*
          * Before we call engine->cancel_requests(), we should have exclusive
@@ -771,6 +892,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
  
         /* Cancel the requests on the HW and clear the ELSP tracker. */
         execlists_cancel_port_requests(execlists);
+       reset_irq(engine);
  
         spin_lock(&engine->timeline->lock);
  
@@ -786,8 +908,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
         while (rb) {
                 struct i915_priolist *p = to_priolist(rb);
  
-               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
-                       INIT_LIST_HEAD(&rq->priotree.link);
+               list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+                       INIT_LIST_HEAD(&rq->sched.link);
  
                         dma_fence_set_error(&rq->fence, -EIO);
                         __i915_request_submit(rq);
@@ -809,17 +931,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
  
         spin_unlock(&engine->timeline->lock);
  
-       /*
-        * The port is checked prior to scheduling a tasklet, but
-        * just in case we have suspended the tasklet to do the
-        * wedging make sure that when it wakes, it decides there
-        * is no work to do by clearing the irq_posted bit.
-        */
-       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-       /* Mark all CS interrupts as complete */
-       execlists->active = 0;
-
         local_irq_restore(flags);
  }
  
@@ -831,7 +942,7 @@ static void execlists_submission_tasklet(unsigned long data)
  {
         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
         struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port * const port = execlists->port;
+       struct execlist_port *port = execlists->port;
         struct drm_i915_private *dev_priv = engine->i915;
         bool fw = false;
  
@@ -958,10 +1069,13 @@ static void execlists_submission_tasklet(unsigned long data)
                                                         EXECLISTS_ACTIVE_USER));
  
                         rq = port_unpack(port, &count);
-                       GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+                       GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
                                   engine->name,
                                   port->context_id, count,
                                   rq ? rq->global_seqno : 0,
+                                 rq ? rq->fence.context : 0,
+                                 rq ? rq->fence.seqno : 0,
+                                 intel_engine_get_seqno(engine),
                                   rq ? rq_prio(rq) : 0);
  
                         /* Check the context/desc id for this event matches */
@@ -969,10 +1083,28 @@ static void execlists_submission_tasklet(unsigned long data)
  
                         GEM_BUG_ON(count == 0);
                         if (--count == 0) {
+                               /*
+                                * On the final event corresponding to the
+                                * submission of this context, we expect either
+                                * an element-switch event or a completion
+                                * event (and on completion, the active-idle
+                                * marker). No more preemptions, lite-restore
+                                * or otherwise.
+                                */
                                 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
                                 GEM_BUG_ON(port_isset(&port[1]) &&
                                            !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+                               GEM_BUG_ON(!port_isset(&port[1]) &&
+                                          !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+                               /*
+                                * We rely on the hardware being strongly
+                                * ordered, that the breadcrumb write is
+                                * coherent (visible from the CPU) before the
+                                * user interrupt and CSB is processed.
+                                */
                                 GEM_BUG_ON(!i915_request_completed(rq));
+
                                 execlists_context_schedule_out(rq);
                                 trace_i915_request_out(rq);
                                 i915_request_put(rq);
@@ -980,17 +1112,14 @@ static void execlists_submission_tasklet(unsigned long data)
                                 GEM_TRACE("%s completed ctx=%d\n",
                                           engine->name, port->context_id);
  
-                               execlists_port_complete(execlists, port);
+                               port = execlists_port_complete(execlists, port);
+                               if (port_isset(port))
+                                       execlists_user_begin(execlists, port);
+                               else
+                                       execlists_user_end(execlists);
                         } else {
                                 port_set(port, port_pack(rq, count));
                         }
-
-                       /* After the final element, the hw should be idle */
-                       GEM_BUG_ON(port_count(port) == 0 &&
-                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-                       if (port_count(port) == 0)
-                               execlists_clear_active(execlists,
-                                                      EXECLISTS_ACTIVE_USER);
                 }
  
                 if (head != execlists->csb_head) {
@@ -1013,18 +1142,23 @@ static void execlists_submission_tasklet(unsigned long data)
  }
  
  static void queue_request(struct intel_engine_cs *engine,
-                         struct i915_priotree *pt,
+                         struct i915_sched_node *node,
                           int prio)
  {
-       list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+       list_add_tail(&node->link,
+                     &lookup_priolist(engine, node, prio)->requests);
+}
+
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
+{
+       engine->execlists.queue_priority = prio;
+       tasklet_hi_schedule(&engine->execlists.tasklet);
  }
  
  static void submit_queue(struct intel_engine_cs *engine, int prio)
  {
-       if (prio > engine->execlists.queue_priority) {
-               engine->execlists.queue_priority = prio;
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-       }
+       if (prio > engine->execlists.queue_priority)
+               __submit_queue(engine, prio);
  }
  
  static void execlists_submit_request(struct i915_request *request)
@@ -1035,24 +1169,24 @@ static void execlists_submit_request(struct i915_request *request)
         /* Will be called from irq-context when using foreign fences. */
         spin_lock_irqsave(&engine->timeline->lock, flags);
  
-       queue_request(engine, &request->priotree, rq_prio(request));
+       queue_request(engine, &request->sched, rq_prio(request));
         submit_queue(engine, rq_prio(request));
  
         GEM_BUG_ON(!engine->execlists.first);
-       GEM_BUG_ON(list_empty(&request->priotree.link));
+       GEM_BUG_ON(list_empty(&request->sched.link));
  
         spin_unlock_irqrestore(&engine->timeline->lock, flags);
  }
  
-static struct i915_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *sched_to_request(struct i915_sched_node *node)
  {
-       return container_of(pt, struct i915_request, priotree);
+       return container_of(node, struct i915_request, sched);
  }
  
  static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
  {
-       struct intel_engine_cs *engine = pt_to_request(pt)->engine;
+       struct intel_engine_cs *engine = sched_to_request(node)->engine;
  
         GEM_BUG_ON(!locked);
  
@@ -1064,11 +1198,13 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
         return engine;
  }
  
-static void execlists_schedule(struct i915_request *request, int prio)
+static void execlists_schedule(struct i915_request *request,
+                              const struct i915_sched_attr *attr)
  {
         struct intel_engine_cs *engine;
         struct i915_dependency *dep, *p;
         struct i915_dependency stack;
+       const int prio = attr->priority;
         LIST_HEAD(dfs);
  
         GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
@@ -1076,23 +1212,23 @@ static void execlists_schedule(struct i915_request *request, int prio)
         if (i915_request_completed(request))
                 return;
  
-       if (prio <= READ_ONCE(request->priotree.priority))
+       if (prio <= READ_ONCE(request->sched.attr.priority))
                 return;
  
         /* Need BKL in order to use the temporary link inside i915_dependency */
         lockdep_assert_held(&request->i915->drm.struct_mutex);
  
-       stack.signaler = &request->priotree;
+       stack.signaler = &request->sched;
         list_add(&stack.dfs_link, &dfs);
  
         /*
          * Recursively bump all dependent priorities to match the new request.
          *
          * A naive approach would be to use recursion:
-        * static void update_priorities(struct i915_priotree *pt, prio) {
-        *      list_for_each_entry(dep, &pt->signalers_list, signal_link)
+        * static void update_priorities(struct i915_sched_node *node, prio) {
+        *      list_for_each_entry(dep, &node->signalers_list, signal_link)
          *              update_priorities(dep->signal, prio)
-        *      queue_request(pt);
+        *      queue_request(node);
          * }
          * but that may have unlimited recursion depth and so runs a very
          * real risk of overunning the kernel stack. Instead, we build
@@ -1104,7 +1240,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
          * last element in the list is the request we must execute first.
          */
         list_for_each_entry(dep, &dfs, dfs_link) {
-               struct i915_priotree *pt = dep->signaler;
+               struct i915_sched_node *node = dep->signaler;
  
                 /*
                  * Within an engine, there can be no cycle, but we may
@@ -1112,14 +1248,14 @@ static void execlists_schedule(struct i915_request *request, int prio)
                  * (redundant dependencies are not eliminated) and across
                  * engines.
                  */
-               list_for_each_entry(p, &pt->signalers_list, signal_link) {
+               list_for_each_entry(p, &node->signalers_list, signal_link) {
                         GEM_BUG_ON(p == dep); /* no cycles! */
  
-                       if (i915_priotree_signaled(p->signaler))
+                       if (i915_sched_node_signaled(p->signaler))
                                 continue;
  
-                       GEM_BUG_ON(p->signaler->priority < pt->priority);
-                       if (prio > READ_ONCE(p->signaler->priority))
+                       GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+                       if (prio > READ_ONCE(p->signaler->attr.priority))
                                 list_move_tail(&p->dfs_link, &dfs);
                 }
         }
@@ -1130,9 +1266,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
          * execlists_submit_request()), we can set our own priority and skip
          * acquiring the engine locks.
          */
-       if (request->priotree.priority == I915_PRIORITY_INVALID) {
-               GEM_BUG_ON(!list_empty(&request->priotree.link));
-               request->priotree.priority = prio;
+       if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
+               GEM_BUG_ON(!list_empty(&request->sched.link));
+               request->sched.attr = *attr;
                 if (stack.dfs_link.next == stack.dfs_link.prev)
                         return;
                 __list_del_entry(&stack.dfs_link);
@@ -1143,21 +1279,24 @@ static void execlists_schedule(struct i915_request *request, int prio)
  
         /* Fifo and depth-first replacement ensure our deps execute before us */
         list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-               struct i915_priotree *pt = dep->signaler;
+               struct i915_sched_node *node = dep->signaler;
  
                 INIT_LIST_HEAD(&dep->dfs_link);
  
-               engine = pt_lock_engine(pt, engine);
+               engine = sched_lock_engine(node, engine);
  
-               if (prio <= pt->priority)
+               if (prio <= node->attr.priority)
                         continue;
  
-               pt->priority = prio;
-               if (!list_empty(&pt->link)) {
-                       __list_del_entry(&pt->link);
-                       queue_request(engine, pt, prio);
+               node->attr.priority = prio;
+               if (!list_empty(&node->link)) {
+                       __list_del_entry(&node->link);
+                       queue_request(engine, node, prio);
                 }
-               submit_queue(engine, prio);
+
+               if (prio > engine->execlists.queue_priority &&
+                   i915_sw_fence_done(&sched_to_request(node)->submit))
+                       __submit_queue(engine, prio);
         }
  
         spin_unlock_irq(&engine->timeline->lock);
@@ -1190,7 +1329,7 @@ static struct intel_ring *
  execlists_context_pin(struct intel_engine_cs *engine,
                       struct i915_gem_context *ctx)
  {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
         void *vaddr;
         int ret;
  
@@ -1224,6 +1363,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
         ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
                 i915_ggtt_offset(ce->ring->vma);
+       ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head;
  
         ce->state->obj->pin_global++;
         i915_gem_context_get(ctx);
@@ -1242,7 +1382,7 @@ err:
  static void execlists_context_unpin(struct intel_engine_cs *engine,
                                     struct i915_gem_context *ctx)
  {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
  
         lockdep_assert_held(&ctx->i915->drm.struct_mutex);
         GEM_BUG_ON(ce->pin_count == 0);
@@ -1261,8 +1401,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
  
  static int execlists_request_alloc(struct i915_request *request)
  {
-       struct intel_engine_cs *engine = request->engine;
-       struct intel_context *ce = &request->ctx->engine[engine->id];
+       struct intel_context *ce =
+               to_intel_context(request->ctx, request->engine);
         int ret;
  
         GEM_BUG_ON(!ce->pin_count);
@@ -1574,14 +1714,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
         return ret;
  }
  
-static u8 gtiir[] = {
-       [RCS] = 0,
-       [BCS] = 0,
-       [VCS] = 1,
-       [VCS2] = 1,
-       [VECS] = 3,
-};
-
  static void enable_execlists(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
@@ -1641,6 +1773,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
         if (ret)
                 return ret;
  
+       intel_whitelist_workarounds_apply(engine);
+
         /* We need to disable the AsyncFlip performance optimisations in order
          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
          * programmed to '1' on all products.
@@ -1651,7 +1785,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
  
         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
  
-       return init_workarounds_ring(engine);
+       return 0;
  }
  
  static int gen9_init_render_ring(struct intel_engine_cs *engine)
@@ -1662,49 +1796,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
         if (ret)
                 return ret;
  
-       return init_workarounds_ring(engine);
-}
-
-static void reset_irq(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       int i;
-
-       GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
-       /*
-        * Clear any pending interrupt state.
-        *
-        * We do it twice out of paranoia that some of the IIR are double
-        * buffered, and if we only reset it once there may still be
-        * an interrupt pending.
-        */
-       for (i = 0; i < 2; i++) {
-               I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-                          GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-               POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
-       }
-       GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
-                  (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+       intel_whitelist_workarounds_apply(engine);
  
-       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+       return 0;
  }
  
  static void reset_common_ring(struct intel_engine_cs *engine,
                               struct i915_request *request)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct intel_context *ce;
         unsigned long flags;
+       u32 *regs;
  
-       GEM_TRACE("%s seqno=%x\n",
-                 engine->name, request ? request->global_seqno : 0);
+       GEM_TRACE("%s request global=%x, current=%d\n",
+                 engine->name, request ? request->global_seqno : 0,
+                 intel_engine_get_seqno(engine));
  
         /* See execlists_cancel_requests() for the irq/spinlock split. */
         local_irq_save(flags);
  
-       reset_irq(engine);
-
         /*
          * Catch up with any missed context-switch interrupts.
          *
@@ -1715,15 +1825,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
          * requests were completed.
          */
         execlists_cancel_port_requests(execlists);
+       reset_irq(engine);
  
         /* Push back any incomplete requests for replay after the reset. */
         spin_lock(&engine->timeline->lock);
         __unwind_incomplete_requests(engine);
         spin_unlock(&engine->timeline->lock);
  
-       /* Mark all CS interrupts as complete */
-       execlists->active = 0;
-
         local_irq_restore(flags);
  
         /*
@@ -1748,14 +1856,24 @@ static void reset_common_ring(struct intel_engine_cs *engine,
          * future request will be after userspace has had the opportunity
          * to recreate its own state.
          */
-       ce = &request->ctx->engine[engine->id];
-       execlists_init_reg_state(ce->lrc_reg_state,
-                                request->ctx, engine, ce->ring);
+       regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+       if (engine->default_state) {
+               void *defaults;
+
+               defaults = i915_gem_object_pin_map(engine->default_state,
+                                                  I915_MAP_WB);
+               if (!IS_ERR(defaults)) {
+                       memcpy(regs, /* skip restoring the vanilla PPHWSP */
+                              defaults + LRC_STATE_PN * PAGE_SIZE,
+                              engine->context_size - PAGE_SIZE);
+                       i915_gem_object_unpin_map(engine->default_state);
+               }
+       }
+       execlists_init_reg_state(regs, request->ctx, engine, request->ring);
  
         /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-       ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-               i915_ggtt_offset(ce->ring->vma);
-       ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+       regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+       regs[CTX_RING_HEAD + 1] = request->postfix;
  
         request->ring->head = request->postfix;
         intel_ring_update_space(request->ring);
@@ -2015,7 +2133,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
  {
         int ret;
  
-       ret = intel_ring_workarounds_emit(rq);
+       ret = intel_ctx_workarounds_emit(rq);
         if (ret)
                 return ret;
  
@@ -2075,11 +2193,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
         engine->unpark = NULL;
  
         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+       if (engine->i915->preempt_context)
+               engine->flags |= I915_ENGINE_HAS_PREEMPTION;
  
         engine->i915->caps.scheduler =
                 I915_SCHEDULER_CAP_ENABLED |
                 I915_SCHEDULER_CAP_PRIORITY;
-       if (engine->i915->preempt_context)
+       if (intel_engine_has_preemption(engine))
                 engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
  }
  
@@ -2118,7 +2238,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
  static inline void
  logical_ring_default_irqs(struct intel_engine_cs *engine)
  {
-       unsigned shift = engine->irq_shift;
+       unsigned int shift = 0;
+
+       if (INTEL_GEN(engine->i915) < 11) {
+               const u8 irq_shifts[] = {
+                       [RCS]  = GEN8_RCS_IRQ_SHIFT,
+                       [BCS]  = GEN8_BCS_IRQ_SHIFT,
+                       [VCS]  = GEN8_VCS1_IRQ_SHIFT,
+                       [VCS2] = GEN8_VCS2_IRQ_SHIFT,
+                       [VECS] = GEN8_VECS_IRQ_SHIFT,
+               };
+
+               shift = irq_shifts[engine->id];
+       }
+
         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
  }
@@ -2174,9 +2307,13 @@ static int logical_ring_init(struct intel_engine_cs *engine)
         }
  
         engine->execlists.preempt_complete_status = ~0u;
-       if (engine->i915->preempt_context)
+       if (engine->i915->preempt_context) {
+               struct intel_context *ce =
+                       to_intel_context(engine->i915->preempt_context, engine);
+
                 engine->execlists.preempt_complete_status =
-                       upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+                       upper_32_bits(ce->lrc_desc);
+       }
  
         return 0;
  
@@ -2458,7 +2595,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                             struct intel_engine_cs *engine)
  {
         struct drm_i915_gem_object *ctx_obj;
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
         struct i915_vma *vma;
         uint32_t context_size;
         struct intel_ring *ring;
@@ -2529,7 +2666,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
          */
         list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
                 for_each_engine(engine, dev_priv, id) {
-                       struct intel_context *ce = &ctx->engine[engine->id];
+                       struct intel_context *ce =
+                               to_intel_context(ctx, engine);
                         u32 *reg;
  
                         if (!ce->state)
@@ -2551,3 +2689,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                 }
         }
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_lrc.c"
+#endif