drm/amdgpu: fix fence calculation (v2)
authorDavid M Nieto <david.nieto@amd.com>
Thu, 13 May 2021 17:45:39 +0000 (10:45 -0700)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 13 May 2021 18:09:12 +0000 (14:09 -0400)
The proper metric for fence utilization over several
contexts is an harmonic mean, but such calculation is
prohibitive in kernel space, so the code approximates it.

Because the approximation diverges when one context has a
very small ratio compared with the other context, this change
filter out ratios smaller that 0.01%

v2: make the fence calculation static and initialize variables
within that function

v3: Fix warnings (Alex)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: David M Nieto <david.nieto@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210513174539.27409-2-david.nieto@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 9036c93b4a0c67bd1eb22e2aea8b2be820b8b2c6..fc83445fbc40e336b112e26422e92a4a0e1111fb 100644 (file)
@@ -652,12 +652,14 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
        mutex_destroy(&mgr->lock);
 }
 
-void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity *centity,
-               ktime_t *total, ktime_t *max)
+static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
+               struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
 {
        ktime_t now, t1;
        uint32_t i;
 
+       *total = *max = 0;
+
        now = ktime_get();
        for (i = 0; i < amdgpu_sched_jobs; i++) {
                struct dma_fence *fence;
@@ -703,11 +705,22 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
        idp = &mgr->ctx_handles;
        mutex_lock(&mgr->lock);
        idr_for_each_entry(idp, ctx, id) {
+               ktime_t ttotal, tmax;
+
                if (!ctx->entities[hwip][idx])
                        continue;
 
                centity = ctx->entities[hwip][idx];
-               amdgpu_ctx_fence_time(ctx, centity, &total, &max);
+               amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
+
+               /* Harmonic mean approximation diverges for very small
+                * values. If ratio < 0.01% ignore
+                */
+               if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
+                       continue;
+
+               total = ktime_add(total, ttotal);
+               max = ktime_after(tmax, max) ? tmax : max;
        }
 
        mutex_unlock(&mgr->lock);
index 10dcf59a5c6b0ccf4626d2cef260d4ce8caee9a2..14db16bc3322222c0db5f95b76b163f15ad0e0a6 100644 (file)
@@ -30,6 +30,7 @@ struct drm_file;
 struct amdgpu_fpriv;
 
 #define AMDGPU_MAX_ENTITY_NUM 4
+#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
 
 struct amdgpu_ctx_entity {
        uint64_t                sequence;
@@ -89,6 +90,4 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
                uint32_t idx, uint64_t *elapsed);
-void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity *centity,
-               ktime_t *total, ktime_t *max);
 #endif