{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- CE_TRACE(ce, "retire\n");
+ CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
+ intel_context_get_total_runtime_ns(ce),
+ intel_context_get_avg_runtime_ns(ce));
set_bit(CONTEXT_VALID_BIT, &ce->flags);
if (ce->state)
ce->sseu = engine->sseu;
ce->ring = __intel_context_ring_size(SZ_4K);
+ ewma_runtime_init(&ce->runtime.avg);
+
ce->vm = i915_vm_get(engine->gt->vm);
INIT_LIST_HEAD(&ce->signal_link);
#include <linux/types.h>
#include "i915_active.h"
+#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
#include "intel_ring_types.h"
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
+static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
+{
+ const u32 period =
+ RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+ return READ_ONCE(ce->runtime.total) * period;
+}
+
+static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+ const u32 period =
+ RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+ return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+}
+
#endif /* __INTEL_CONTEXT_H__ */
#ifndef __INTEL_CONTEXT_TYPES__
#define __INTEL_CONTEXT_TYPES__
+#include <linux/average.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/mutex.h>
#define CONTEXT_REDZONE POISON_INUSE
+DECLARE_EWMA(runtime, 3, 8);
+
struct i915_gem_context;
struct i915_vma;
struct intel_context;
u64 lrc_desc;
u32 tag; /* cookie passed to HW to track this context on submission */
+ /* Time on GPU as tracked by the hw. */
+ struct {
+ struct ewma_runtime avg;
+ u64 total;
+ u32 last;
+ I915_SELFTEST_DECLARE(u32 num_underflow);
+ I915_SELFTEST_DECLARE(u32 max_underflow);
+ } runtime;
+
unsigned int active_count; /* protected by timeline->mutex */
atomic_t pin_count;
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+static void intel_context_update_runtime(struct intel_context *ce)
+{
+ u32 old;
+ s32 dt;
+
+ if (intel_context_is_barrier(ce))
+ return;
+
+ old = ce->runtime.last;
+ ce->runtime.last = intel_context_get_runtime(ce);
+ dt = ce->runtime.last - old;
+
+ if (unlikely(dt <= 0)) {
+ CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+ old, ce->runtime.last, dt);
+ I915_SELFTEST_ONLY(ce->runtime.num_underflow += dt < 0);
+ I915_SELFTEST_ONLY(ce->runtime.max_underflow =
+ max_t(u32, ce->runtime.max_underflow, -dt));
+ return;
+ }
+
+ ewma_runtime_add(&ce->runtime.avg, dt);
+ ce->runtime.total += dt;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put_async(engine->gt);
inhibit = false;
}
- /* The second page of the context object contains some fields which must
- * be set up prior to the first execution. */
+ /* Clear the ppHWSP (inc. per-context counters) */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /*
+ * The second page of the context object contains some registers which
+ * must be set up prior to the first execution.
+ */
execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
ce, engine, ring, inhibit);
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
#define CTX_BB_PER_CTX_PTR (0x18 + 1)
+#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
#define CTX_PDP3_LDW (0x26 + 1)
#define CTX_PDP2_UDW (0x28 + 1)
CTX_BB_STATE - 1,
"BB_STATE"
},
+ {
+ i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+ CTX_TIMESTAMP - 1,
+ "RING_CTX_TIMESTAMP"
+ },
{ },
}, *t;
u32 *hw;
return err;
}
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ ce->runtime.num_underflow = 0;
+ ce->runtime.max_underflow = 0;
+
+ do {
+ unsigned int loop = 1024;
+
+ while (loop) {
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_rq;
+ }
+
+ if (--loop == 0)
+ i915_request_get(rq);
+
+ i915_request_add(rq);
+ }
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+
+ i915_request_put(rq);
+ } while (1);
+
+ err = i915_request_wait(rq, 0, HZ / 5);
+ if (err < 0) {
+ pr_err("%s: request not completed!\n", engine->name);
+ goto err_wait;
+ }
+
+ igt_flush_test(engine->i915);
+
+ pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+ engine->name,
+ intel_context_get_total_runtime_ns(ce),
+ intel_context_get_avg_runtime_ns(ce));
+
+ err = 0;
+ if (ce->runtime.num_underflow) {
+ pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+ engine->name,
+ ce->runtime.num_underflow,
+ ce->runtime.max_underflow);
+ GEM_TRACE_DUMP();
+ err = -EOVERFLOW;
+ }
+
+err_wait:
+ i915_request_put(rq);
+err_rq:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that cumulative context runtime as stored in the pphwsp[16]
+ * is monotonic.
+ */
+
+ for_each_engine(engine, gt, id) {
+ err = __live_pphwsp_runtime(engine);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ return err;
+}
+
int intel_lrc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_fixed),
SUBTEST(live_lrc_state),
SUBTEST(live_gpr_clear),
+ SUBTEST(live_pphwsp_runtime),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
const char *header,
const struct i915_gem_context_coredump *ctx)
{
- err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+ const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns;
+
+ err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
- ctx->guilty, ctx->active);
+ ctx->guilty, ctx->active,
+ ctx->total_runtime * period,
+ mul_u32_u32(ctx->avg_runtime, period));
}
static struct i915_vma_coredump *
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
+ e->total_runtime = rq->context->runtime.total;
+ e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+
simulated = i915_gem_context_no_error_capture(ctx);
i915_gem_context_put(ctx);
struct i915_gem_context_coredump {
char comm[TASK_COMM_LEN];
+
+ u64 total_runtime;
+ u32 avg_runtime;
+
pid_t pid;
int active;
int guilty;
/* Initialize command stream timestamp frequency */
runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
+ runtime->cs_timestamp_period_ns =
+ div_u64(1e6, runtime->cs_timestamp_frequency_khz);
+ drm_dbg(&dev_priv->drm,
+ "CS timestamp wraparound in %lldms\n",
+ div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, S32_MAX),
+ USEC_PER_SEC));
}
void intel_driver_caps_print(const struct intel_driver_caps *caps,
struct sseu_dev_info sseu;
u32 cs_timestamp_frequency_khz;
+ u32 cs_timestamp_period_ns;
/* Media engine access to SFC per instance */
u8 vdbox_sfc_access;