drm/etnaviv: print offender task information on hangcheck recovery
authorChristian Gmeiner <christian.gmeiner@gmail.com>
Fri, 3 Jun 2022 12:37:05 +0000 (14:37 +0200)
committerLucas Stach <l.stach@pengutronix.de>
Fri, 19 Aug 2022 16:31:43 +0000 (18:31 +0200)
Track the pid per submit, so we can print the name and cmdline of
the task which submitted the batch that caused the gpu to hang.

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/etnaviv_sched.c

index 63688e6e45804f29737fefa8aa4854034901aac5..baa81cbf701a6a225daf7383ee5d0e9c16df82d5 100644 (file)
@@ -96,6 +96,7 @@ struct etnaviv_gem_submit {
        int out_fence_id;
        struct list_head node; /* GPU active submit list */
        struct etnaviv_cmdbuf cmdbuf;
+       struct pid *pid;       /* submitting process */
        bool runtime_resumed;
        u32 exec_state;
        u32 flags;
index 1ac916b248917c2c88e3ac6ac874c79c40c92302..1491159d0d2095fb0715fcaa18ea67d120b0c0b5 100644 (file)
@@ -399,6 +399,9 @@ static void submit_cleanup(struct kref *kref)
                mutex_unlock(&submit->gpu->fence_lock);
                dma_fence_put(submit->out_fence);
        }
+
+       put_pid(submit->pid);
+
        kfree(submit->pmrs);
        kfree(submit);
 }
@@ -422,6 +425,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
        struct sync_file *sync_file = NULL;
        struct ww_acquire_ctx ticket;
        int out_fence_fd = -1;
+       struct pid *pid = get_pid(task_pid(current));
        void *stream;
        int ret;
 
@@ -519,6 +523,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
                goto err_submit_ww_acquire;
        }
 
+       submit->pid = pid;
+
        ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &submit->cmdbuf,
                                  ALIGN(args->stream_size, 8) + 8);
        if (ret)
index 37018bc55810d1385c455ba0b274f79162b33faa..7d9bf4673e2d73203c2cdf9d980405da3a86070e 100644 (file)
@@ -1045,12 +1045,28 @@ pm_put:
 }
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit)
 {
+       struct etnaviv_gpu *gpu = submit->gpu;
+       char *comm = NULL, *cmd = NULL;
+       struct task_struct *task;
        unsigned int i;
 
        dev_err(gpu->dev, "recover hung GPU!\n");
 
+       task = get_pid_task(submit->pid, PIDTYPE_PID);
+       if (task) {
+               comm = kstrdup(task->comm, GFP_KERNEL);
+               cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+               put_task_struct(task);
+       }
+
+       if (comm && cmd)
+               dev_err(gpu->dev, "offending task: %s (%s)\n", comm, cmd);
+
+       kfree(cmd);
+       kfree(comm);
+
        if (pm_runtime_get_sync(gpu->dev) < 0)
                goto pm_put;
 
index 85eddd492774d59c34189f6768e24e3f42dd8766..b3a0941d56fd37efef360f6fc8919d6d3a85cf4e 100644 (file)
@@ -168,7 +168,7 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu);
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit);
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
        u32 fence, struct drm_etnaviv_timespec *timeout);
index 72e2553fbc984983df9767a7ed7ae9536477227c..d29f467eee1380b586c7d23fd01b994bbd713cdb 100644 (file)
@@ -67,7 +67,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 
        /* get the GPU back into the init state */
        etnaviv_core_dump(submit);
-       etnaviv_gpu_recover_hang(gpu);
+       etnaviv_gpu_recover_hang(submit);
 
        drm_sched_resubmit_jobs(&gpu->sched);