drm/i915/execlists: Don't merely skip submission if maybe timeslicing
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 18 Oct 2019 07:20:27 +0000 (08:20 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 18 Oct 2019 10:23:26 +0000 (11:23 +0100)
Normally, we try and skip submission if ELSP[1] is filled. However, we
may desire to enable timeslicing due to the queue priority, even if
ELSP[1] itself does not require timeslicing. That is the queue is equal
priority to ELSP[0] and higher priority then ELSP[1]. Previously, we
would wait until the context switch to preempt the current ELSP[1], but
with timeslicing, we want to preempt ELSP[0] and replace it with the
queue.

In writing the test case, it become quickly apparent that we were also
suppressing the tasklet during promotion and so failing to notice when
the queue started requiring timeslicing.

Fixes: 2229adc81380 ("drm/i915/execlist: Trim immediate timeslice expiry")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191018072027.31948-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/i915_scheduler.h

index e9fe9f79cedd60f6a82896f4d2ff860678978217..d0088d0202202d3cc201dc6d30535adc05b4b233 100644 (file)
@@ -352,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
         * However, the priority hint is a mere hint that we may need to
         * preempt. If that hint is stale or we may be trying to preempt
         * ourselves, ignore the request.
+        *
+        * More naturally we would write
+        *      prio >= max(0, last);
+        * except that we wish to prevent triggering preemption at the same
+        * priority level: the task that is running should remain running
+        * to preserve FIFO ordering of dependencies.
         */
-       last_prio = effective_prio(rq);
-       if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
-                                        last_prio))
+       last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+       if (engine->execlists.queue_priority_hint <= last_prio)
                return false;
 
        /*
@@ -1509,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                         * submission.
                         */
                        if (!list_is_last(&last->sched.link,
-                                         &engine->active.requests))
+                                         &engine->active.requests)) {
+                               /*
+                                * Even if ELSP[1] is occupied and not worthy
+                                * of timeslices, our queue might be.
+                                */
+                               if (!execlists->timer.expires &&
+                                   need_timeslice(engine, last))
+                                       mod_timer(&execlists->timer,
+                                                 jiffies + 1);
                                return;
+                       }
 
                        /*
                         * WaIdleLiteRestore:bdw,skl
index 007d6203fe0eb8593b430dfc452a6bc61ed69745..5dc679781a084e0d7e8e029044d32757530a9bda 100644 (file)
@@ -325,7 +325,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
        if (IS_ERR(rq))
                goto out_ctx;
 
-       err = emit_semaphore_chain(rq, vma, idx);
+       err = 0;
+       if (rq->engine->emit_init_breadcrumb)
+               err = rq->engine->emit_init_breadcrumb(rq);
+       if (err == 0)
+               err = emit_semaphore_chain(rq, vma, idx);
+       if (err == 0)
+               i915_request_get(rq);
        i915_request_add(rq);
        if (err)
                rq = ERR_PTR(err);
@@ -338,10 +344,10 @@ out_ctx:
 static int
 release_queue(struct intel_engine_cs *engine,
              struct i915_vma *vma,
-             int idx)
+             int idx, int prio)
 {
        struct i915_sched_attr attr = {
-               .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+               .priority = prio,
        };
        struct i915_request *rq;
        u32 *cs;
@@ -362,9 +368,15 @@ release_queue(struct intel_engine_cs *engine,
        *cs++ = 1;
 
        intel_ring_advance(rq, cs);
+
+       i915_request_get(rq);
        i915_request_add(rq);
 
+       local_bh_disable();
        engine->schedule(rq, &attr);
+       local_bh_enable(); /* kick tasklet */
+
+       i915_request_put(rq);
 
        return 0;
 }
@@ -383,7 +395,6 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
        if (IS_ERR(head))
                return PTR_ERR(head);
 
-       i915_request_get(head);
        for_each_engine(engine, outer->gt, id) {
                for (i = 0; i < count; i++) {
                        struct i915_request *rq;
@@ -393,10 +404,12 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
                                err = PTR_ERR(rq);
                                goto out;
                        }
+
+                       i915_request_put(rq);
                }
        }
 
-       err = release_queue(outer, vma, n);
+       err = release_queue(outer, vma, n, INT_MAX);
        if (err)
                goto out;
 
@@ -482,6 +495,150 @@ err_obj:
        return err;
 }
 
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+       struct i915_request *rq;
+
+       rq = i915_request_create(engine->kernel_context);
+       if (IS_ERR(rq))
+               return rq;
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+
+       return rq;
+}
+
+static void wait_for_submit(struct intel_engine_cs *engine,
+                           struct i915_request *rq)
+{
+       do {
+               cond_resched();
+               intel_engine_flush_submission(engine);
+       } while (!i915_request_is_active(rq));
+}
+
+static int live_timeslice_queue(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct drm_i915_gem_object *obj;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       struct i915_vma *vma;
+       void *vaddr;
+       int err = 0;
+
+       /*
+        * Make sure that even if ELSP[0] and ELSP[1] are filled with
+        * timeslicing between them disabled, we *do* enable timeslicing
+        * if the queue demands it. (Normally, we do not submit if
+        * ELSP[1] is already occupied, so must rely on timeslicing to
+        * eject ELSP[0] in favour of the queue.)
+        */
+
+       obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err_obj;
+       }
+
+       vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+       if (IS_ERR(vaddr)) {
+               err = PTR_ERR(vaddr);
+               goto err_obj;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+       if (err)
+               goto err_map;
+
+       for_each_engine(engine, gt, id) {
+               struct i915_sched_attr attr = {
+                       .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+               };
+               struct i915_request *rq, *nop;
+
+               if (!intel_engine_has_preemption(engine))
+                       continue;
+
+               memset(vaddr, 0, PAGE_SIZE);
+
+               /* ELSP[0]: semaphore wait */
+               rq = semaphore_queue(engine, vma, 0);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       goto err_pin;
+               }
+               engine->schedule(rq, &attr);
+               wait_for_submit(engine, rq);
+
+               /* ELSP[1]: nop request */
+               nop = nop_request(engine);
+               if (IS_ERR(nop)) {
+                       err = PTR_ERR(nop);
+                       i915_request_put(rq);
+                       goto err_pin;
+               }
+               wait_for_submit(engine, nop);
+               i915_request_put(nop);
+
+               GEM_BUG_ON(i915_request_completed(rq));
+               GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+               /* Queue: semaphore signal, matching priority as semaphore */
+               err = release_queue(engine, vma, 1, effective_prio(rq));
+               if (err) {
+                       i915_request_put(rq);
+                       goto err_pin;
+               }
+
+               intel_engine_flush_submission(engine);
+               if (!READ_ONCE(engine->execlists.timer.expires) &&
+                   !i915_request_completed(rq)) {
+                       struct drm_printer p =
+                               drm_info_printer(gt->i915->drm.dev);
+
+                       GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+                                     engine->name);
+                       intel_engine_dump(engine, &p,
+                                         "%s\n", engine->name);
+                       GEM_TRACE_DUMP();
+
+                       memset(vaddr, 0xff, PAGE_SIZE);
+                       err = -EINVAL;
+               }
+
+               /* Timeslice every jiffie, so within 2 we should signal */
+               if (i915_request_wait(rq, 0, 3) < 0) {
+                       struct drm_printer p =
+                               drm_info_printer(gt->i915->drm.dev);
+
+                       pr_err("%s: Failed to timeslice into queue\n",
+                              engine->name);
+                       intel_engine_dump(engine, &p,
+                                         "%s\n", engine->name);
+
+                       memset(vaddr, 0xff, PAGE_SIZE);
+                       err = -EIO;
+               }
+               i915_request_put(rq);
+               if (err)
+                       break;
+       }
+
+err_pin:
+       i915_vma_unpin(vma);
+err_map:
+       i915_gem_object_unpin_map(obj);
+err_obj:
+       i915_gem_object_put(obj);
+       return err;
+}
+
 static int live_busywait_preempt(void *arg)
 {
        struct intel_gt *gt = arg;
@@ -2437,6 +2594,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                SUBTEST(live_unlite_switch),
                SUBTEST(live_unlite_preempt),
                SUBTEST(live_timeslice_preempt),
+               SUBTEST(live_timeslice_queue),
                SUBTEST(live_busywait_preempt),
                SUBTEST(live_preempt),
                SUBTEST(live_late_preempt),
index 7b84ebca29011f32df0df4e4746ea77857a67fc6..0ca40f6bf08c7b3b1bccfc18c4d1805555e9ddbc 100644 (file)
@@ -177,9 +177,22 @@ static inline int rq_prio(const struct i915_request *rq)
        return rq->sched.attr.priority | __NO_PREEMPTION;
 }
 
+static inline bool need_preempt(int prio, int active)
+{
+       /*
+        * Allow preemption of low -> normal -> high, but we do
+        * not allow low priority tasks to preempt other low priority
+        * tasks under the impression that latency for low priority
+        * tasks does not matter (as much as background throughput),
+        * so kiss.
+        */
+       return prio >= max(I915_PRIORITY_NORMAL, active);
+}
+
 static void kick_submission(struct intel_engine_cs *engine, int prio)
 {
-       const struct i915_request *inflight = *engine->execlists.active;
+       const struct i915_request *inflight =
+               execlists_active(&engine->execlists);
 
        /*
         * If we are already the currently executing context, don't
@@ -188,7 +201,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
         * tasklet, i.e. we have not change the priority queue
         * sufficiently to oust the running context.
         */
-       if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
+       if (!inflight || !need_preempt(prio, rq_prio(inflight)))
                return;
 
        tasklet_hi_schedule(&engine->execlists.tasklet);
index 7eefccff39bf64551d717c35c86af51053bd9c50..07d243acf553b2bc9f018be31d8072e5929c9a2b 100644 (file)
@@ -52,22 +52,4 @@ static inline void i915_priolist_free(struct i915_priolist *p)
                __i915_priolist_free(p);
 }
 
-static inline bool i915_scheduler_need_preempt(int prio, int active)
-{
-       /*
-        * Allow preemption of low -> normal -> high, but we do
-        * not allow low priority tasks to preempt other low priority
-        * tasks under the impression that latency for low priority
-        * tasks does not matter (as much as background throughput),
-        * so kiss.
-        *
-        * More naturally we would write
-        *      prio >= max(0, last);
-        * except that we wish to prevent triggering preemption at the same
-        * priority level: the task that is running should remain running
-        * to preserve FIFO ordering of dependencies.
-        */
-       return prio > max(I915_PRIORITY_NORMAL - 1, active);
-}
-
 #endif /* _I915_SCHEDULER_H_ */