drm/i915: Track hw reported context runtime

author Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Sun, 16 Feb 2020 13:36:20 +0000 (13:36 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Sun, 16 Feb 2020 15:16:22 +0000 (15:16 +0000)
author Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Sun, 16 Feb 2020 13:36:20 +0000 (13:36 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Sun, 16 Feb 2020 15:16:22 +0000 (15:16 +0000)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c

index e4f89341d17cfad2d7b6450ff0ba421266a51787..8bb444cda14fa2c21c6be9a9e0422539a63f7109 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -220,7 +220,9 @@ static void __intel_context_retire(struct i915_active *active)
  {
         struct intel_context *ce = container_of(active, typeof(*ce), active);
  
-       CE_TRACE(ce, "retire\n");
+       CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
+                intel_context_get_total_runtime_ns(ce),
+                intel_context_get_avg_runtime_ns(ce));
  
         set_bit(CONTEXT_VALID_BIT, &ce->flags);
         if (ce->state)
@@ -281,6 +283,8 @@ intel_context_init(struct intel_context *ce,
         ce->sseu = engine->sseu;
         ce->ring = __intel_context_ring_size(SZ_4K);
  
+       ewma_runtime_init(&ce->runtime.avg);
+
         ce->vm = i915_vm_get(engine->gt->vm);
  
         INIT_LIST_HEAD(&ce->signal_link);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h

index 604d5cfc46bab20af91b57bbe53cdd7f8838595c..18efad2551245a915917a1bcb893ba3a9d1a4099 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
  #include <linux/types.h>
  
  #include "i915_active.h"
+#include "i915_drv.h"
  #include "intel_context_types.h"
  #include "intel_engine_types.h"
  #include "intel_ring_types.h"
@@ -227,4 +228,20 @@ intel_context_clear_nopreempt(struct intel_context *ce)
         clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
  }
  
+static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
+{
+       const u32 period =
+               RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+       return READ_ONCE(ce->runtime.total) * period;
+}
+
+static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+       const u32 period =
+               RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+       return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+}
+
  #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h

index ca1420fb8b5339594b2b603db05525359d80b938..11278343b9b55709e0e581e19c44baceb64804d7 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -7,6 +7,7 @@
  #ifndef __INTEL_CONTEXT_TYPES__
  #define __INTEL_CONTEXT_TYPES__
  
+#include <linux/average.h>
  #include <linux/kref.h>
  #include <linux/list.h>
  #include <linux/mutex.h>
@@ -19,6 +20,8 @@
  
  #define CONTEXT_REDZONE POISON_INUSE
  
+DECLARE_EWMA(runtime, 3, 8);
+
  struct i915_gem_context;
  struct i915_vma;
  struct intel_context;
@@ -68,6 +71,15 @@ struct intel_context {
         u64 lrc_desc;
         u32 tag; /* cookie passed to HW to track this context on submission */
  
+       /* Time on GPU as tracked by the hw. */
+       struct {
+               struct ewma_runtime avg;
+               u64 total;
+               u32 last;
+               I915_SELFTEST_DECLARE(u32 num_underflow);
+               I915_SELFTEST_DECLARE(u32 max_underflow);
+       } runtime;
+
         unsigned int active_count; /* protected by timeline->mutex */
  
         atomic_t pin_count;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index c3d7727021dba1c28c659b3450c2cd46a35052b7..78e854440949e3d5e4b0dbdcb059e58a8e2cbbe0 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1195,6 +1195,42 @@ static void reset_active(struct i915_request *rq,
         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
  }
  
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+       /*
+        * We can use either ppHWSP[16] which is recorded before the context
+        * switch (and so excludes the cost of context switches) or use the
+        * value from the context image itself, which is saved/restored earlier
+        * and so includes the cost of the save.
+        */
+       return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+static void intel_context_update_runtime(struct intel_context *ce)
+{
+       u32 old;
+       s32 dt;
+
+       if (intel_context_is_barrier(ce))
+               return;
+
+       old = ce->runtime.last;
+       ce->runtime.last = intel_context_get_runtime(ce);
+       dt = ce->runtime.last - old;
+
+       if (unlikely(dt <= 0)) {
+               CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+                        old, ce->runtime.last, dt);
+               I915_SELFTEST_ONLY(ce->runtime.num_underflow += dt < 0);
+               I915_SELFTEST_ONLY(ce->runtime.max_underflow =
+                                  max_t(u32, ce->runtime.max_underflow, -dt));
+               return;
+       }
+
+       ewma_runtime_add(&ce->runtime.avg, dt);
+       ce->runtime.total += dt;
+}
+
  static inline struct intel_engine_cs *
  __execlists_schedule_in(struct i915_request *rq)
  {
@@ -1278,6 +1314,7 @@ __execlists_schedule_out(struct i915_request *rq,
             i915_request_completed(rq))
                 intel_engine_add_retire(engine, ce->timeline);
  
+       intel_context_update_runtime(ce);
         intel_engine_context_out(engine);
         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
         intel_gt_pm_put_async(engine->gt);
@@ -4607,8 +4644,13 @@ populate_lr_context(struct intel_context *ce,
                 inhibit = false;
         }
  
-       /* The second page of the context object contains some fields which must
-        * be set up prior to the first execution. */
+       /* Clear the ppHWSP (inc. per-context counters) */
+       memset(vaddr, 0, PAGE_SIZE);
+
+       /*
+        * The second page of the context object contains some registers which
+        * must be set up prior to the first execution.
+        */
         execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
                                  ce, engine, ring, inhibit);
  
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h

index 08a3be65f70093e250e296124616759d6483bcf0..d39b72590e40a0be5890f76ecf9ea494a3f46eec 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -17,6 +17,7 @@
  #define CTX_RING_CTL                   (0x0a + 1)
  #define CTX_BB_STATE                   (0x10 + 1)
  #define CTX_BB_PER_CTX_PTR             (0x18 + 1)
+#define CTX_TIMESTAMP                  (0x22 + 1)
  #define CTX_PDP3_UDW                   (0x24 + 1)
  #define CTX_PDP3_LDW                   (0x26 + 1)
  #define CTX_PDP2_UDW                   (0x28 + 1)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index 64761e6198767abd4304ec121233e5780b295702..40c53cc1c7c0e74765505bd0f78b59d0dca5d7b5 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4151,6 +4151,11 @@ static int live_lrc_fixed(void *arg)
                                 CTX_BB_STATE - 1,
                                 "BB_STATE"
                         },
+                       {
+                               i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+                               CTX_TIMESTAMP - 1,
+                               "RING_CTX_TIMESTAMP"
+                       },
                         { },
                 }, *t;
                 u32 *hw;
@@ -4450,6 +4455,96 @@ static int live_gpr_clear(void *arg)
         return err;
  }
  
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+       struct intel_context *ce;
+       struct i915_request *rq;
+       IGT_TIMEOUT(end_time);
+       int err;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       ce->runtime.num_underflow = 0;
+       ce->runtime.max_underflow = 0;
+
+       do {
+               unsigned int loop = 1024;
+
+               while (loop) {
+                       rq = intel_context_create_request(ce);
+                       if (IS_ERR(rq)) {
+                               err = PTR_ERR(rq);
+                               goto err_rq;
+                       }
+
+                       if (--loop == 0)
+                               i915_request_get(rq);
+
+                       i915_request_add(rq);
+               }
+
+               if (__igt_timeout(end_time, NULL))
+                       break;
+
+               i915_request_put(rq);
+       } while (1);
+
+       err = i915_request_wait(rq, 0, HZ / 5);
+       if (err < 0) {
+               pr_err("%s: request not completed!\n", engine->name);
+               goto err_wait;
+       }
+
+       igt_flush_test(engine->i915);
+
+       pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+               engine->name,
+               intel_context_get_total_runtime_ns(ce),
+               intel_context_get_avg_runtime_ns(ce));
+
+       err = 0;
+       if (ce->runtime.num_underflow) {
+               pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+                      engine->name,
+                      ce->runtime.num_underflow,
+                      ce->runtime.max_underflow);
+               GEM_TRACE_DUMP();
+               err = -EOVERFLOW;
+       }
+
+err_wait:
+       i915_request_put(rq);
+err_rq:
+       intel_context_put(ce);
+       return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Check that cumulative context runtime as stored in the pphwsp[16]
+        * is monotonic.
+        */
+
+       for_each_engine(engine, gt, id) {
+               err = __live_pphwsp_runtime(engine);
+               if (err)
+                       break;
+       }
+
+       if (igt_flush_test(gt->i915))
+               err = -EIO;
+
+       return err;
+}
+
  int intel_lrc_live_selftests(struct drm_i915_private *i915)
  {
         static const struct i915_subtest tests[] = {
@@ -4457,6 +4552,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
                 SUBTEST(live_lrc_fixed),
                 SUBTEST(live_lrc_state),
                 SUBTEST(live_gpr_clear),
+               SUBTEST(live_pphwsp_runtime),
         };
  
         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c

index b2ed977ed971103a1282f9bf8ebca697095b70ef..3052c4eaf9f64c9f7884526d1a091cff2ceec12c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -481,9 +481,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
                                 const char *header,
                                 const struct i915_gem_context_coredump *ctx)
  {
-       err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+       const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns;
+
+       err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
                    header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
-                  ctx->guilty, ctx->active);
+                  ctx->guilty, ctx->active,
+                  ctx->total_runtime * period,
+                  mul_u32_u32(ctx->avg_runtime, period));
  }
  
  static struct i915_vma_coredump *
@@ -1260,6 +1264,9 @@ static bool record_context(struct i915_gem_context_coredump *e,
         e->guilty = atomic_read(&ctx->guilty_count);
         e->active = atomic_read(&ctx->active_count);
  
+       e->total_runtime = rq->context->runtime.total;
+       e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+
         simulated = i915_gem_context_no_error_capture(ctx);
  
         i915_gem_context_put(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h

index b35bc9edd7332242271842e0e45918fb03ee912d..0d1f6c8ff3556fa3d8923692741272045141358e 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -88,6 +88,10 @@ struct intel_engine_coredump {
  
         struct i915_gem_context_coredump {
                 char comm[TASK_COMM_LEN];
+
+               u64 total_runtime;
+               u32 avg_runtime;
+
                 pid_t pid;
                 int active;
                 int guilty;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c

index fcdacd6d4aa5531d0b0767b91501156d48e429b3..a97437fac88455caa4b918e8dc6e016dffe53f68 100644 (file)
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -1045,6 +1045,12 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
  
         /* Initialize command stream timestamp frequency */
         runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
+       runtime->cs_timestamp_period_ns =
+               div_u64(1e6, runtime->cs_timestamp_frequency_khz);
+       drm_dbg(&dev_priv->drm,
+               "CS timestamp wraparound in %lldms\n",
+               div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, S32_MAX),
+                       USEC_PER_SEC));
  }
  
  void intel_driver_caps_print(const struct intel_driver_caps *caps,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h

index 7d4d122d2182f1ea0190bfd164473a162c4d3afb..f8bfa26388c13f38c7c4895ffba02c85312aec55 100644 (file)
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -217,6 +217,7 @@ struct intel_runtime_info {
         struct sseu_dev_info sseu;
  
         u32 cs_timestamp_frequency_khz;
+       u32 cs_timestamp_period_ns;
  
         /* Media engine access to SFC per instance */
         u8 vdbox_sfc_access;
author	Tvrtko Ursulin <tvrtko.ursulin@intel.com>
	Sun, 16 Feb 2020 13:36:20 +0000 (13:36 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Sun, 16 Feb 2020 15:16:22 +0000 (15:16 +0000)
drivers/gpu/drm/i915/gt/intel_context.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gt/intel_context.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gt/intel_context_types.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gt/intel_lrc_reg.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gpu_error.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gpu_error.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_device_info.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_device_info.h		patch \| blob \| blame \| history