drm/i915/guc: Limit scheduling properties to avoid overflow
authorJohn Harrison <John.C.Harrison@Intel.com>
Thu, 6 Oct 2022 21:38:10 +0000 (14:38 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Mon, 24 Oct 2022 19:11:59 +0000 (12:11 -0700)
GuC converts the pre-emption timeout and timeslice quantum values into
clock ticks internally. That significantly reduces the point of 32bit
overflow. On current platforms, worst case scenario is approximately
110 seconds. Rather than allowing the user to set higher values and
then get confused by early timeouts, add limits when setting these
values.

v2: Add helper functions for clamping (review feedback from Tvrtko).
v3: Add a bunch of BUG_ON range checks in addition to the checks
already in the clamping functions (Tvrtko)

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221006213813.1563435-2-John.C.Harrison@Intel.com
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/sysfs_engines.c
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 04e435bce79bdfc731b0f4bf834e3bd06381e2da..cbc8b857d5f7a0948897b12e154564395a16f282 100644 (file)
@@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine)
        return engine->hung_ce;
 }
 
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
index c408bac3c53363e7f960b2ed847bc34bc8058652..b1c16aac2a9b33b986d44944a5d9bf8609a897ad 100644 (file)
@@ -512,6 +512,26 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
                engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
        }
 
+       /* Cap properties according to any system limits */
+#define CLAMP_PROP(field) \
+       do { \
+               u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+               if (clamp != engine->props.field) { \
+                       drm_notice(&engine->i915->drm, \
+                                  "Warning, clamping %s to %lld to prevent overflow\n", \
+                                  #field, clamp); \
+                       engine->props.field = clamp; \
+               } \
+       } while (0)
+
+       CLAMP_PROP(heartbeat_interval_ms);
+       CLAMP_PROP(max_busywait_duration_ns);
+       CLAMP_PROP(preempt_timeout_ms);
+       CLAMP_PROP(stop_timeout_ms);
+       CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
+
        engine->defaults = engine->props; /* never to change again */
 
        engine->context_size = intel_engine_context_size(gt, engine->class);
@@ -534,6 +554,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
        return 0;
 }
 
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
+{
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
+{
+       value = min(value, jiffies_to_nsecs(2));
+
+       return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+       /*
+        * NB: The GuC API only supports 32bit values. However, the limit is further
+        * reduced due to internal calculations which would otherwise overflow.
+        */
+       if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+               value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
+{
+       /*
+        * NB: The GuC API only supports 32bit values. However, the limit is further
+        * reduced due to internal calculations which would otherwise overflow.
+        */
+       if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+               value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = engine->i915;
index 9670310562029c005480528a1b996e39413ef6f6..f2d9858d827c23bd3a5bb16a4feaf8d1c9c7b4b8 100644 (file)
@@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
               const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_nsecs(2))
+       clamped = intel_clamp_max_busywait_duration_ns(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
@@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
                const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_timeslice_duration_ms(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
@@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
           const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_stop_timeout_ms(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.stop_timeout_ms, duration);
@@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
                      const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long timeout;
+       unsigned long long timeout, clamped;
        int err;
 
        /*
@@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_preempt_timeout_ms(engine, timeout);
+       if (timeout != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
@@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
                const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long delay;
+       unsigned long long delay, clamped;
        int err;
 
        /*
@@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_heartbeat_interval_ms(engine, delay);
+       if (delay != clamped)
                return -EINVAL;
 
        err = intel_engine_set_heartbeat(engine, delay);
index e7a7fb450f442af2565a07efb4a14996f8907e19..968ebd79dce70edd30c87117f61767a1d7d099cb 100644 (file)
@@ -327,6 +327,27 @@ struct guc_update_scheduling_policy {
 
 #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
 
+/*
+ * GuC converts the timeout to clock ticks internally. Different platforms have
+ * different GuC clocks. Thus, the maximum value before overflow is platform
+ * dependent. Current worst case scenario is about 110s. So, the spec says to
+ * limit to 100s to be safe.
+ */
+#define GUC_POLICY_MAX_EXEC_QUANTUM_US         (100 * 1000 * 1000UL)
+#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US      (100 * 1000 * 1000UL)
+
+static inline u32 guc_policy_max_exec_quantum_ms(void)
+{
+       BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX);
+       return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000;
+}
+
+static inline u32 guc_policy_max_preempt_timeout_ms(void)
+{
+       BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX);
+       return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000;
+}
+
 struct guc_policies {
        u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
        /* In micro seconds. How much time to allow before DPC processing is
index 693b07a977893d9cf7496a44a582bfc94557753d..c433d35ae41ae4226a5e360fea292e553379516d 100644 (file)
@@ -2430,6 +2430,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
        int ret;
 
        /* NB: For both of these, zero means disabled. */
+       GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+                                 execution_quantum));
+       GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+                                 preemption_timeout));
        execution_quantum = engine->props.timeslice_duration_ms * 1000;
        preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 
@@ -2463,6 +2467,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
                desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
 
        /* NB: For both of these, zero means disabled. */
+       GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+                                 desc->execution_quantum));
+       GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+                                 desc->preemption_timeout));
        desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
        desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 }