drm/xe: Convert GuC CT print to snapshot capture and print.
authorRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 16 May 2023 14:54:09 +0000 (10:54 -0400)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:33:52 +0000 (18:33 -0500)
The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

v2: Handle memory allocation failures. (Matthew)
    Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew)
v3: checkpatch fixes
v4: Do not use atomic in the g2h_worker_func (Matthew)

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
drivers/gpu/drm/xe/xe_guc.c
drivers/gpu/drm/xe/xe_guc_ct.c
drivers/gpu/drm/xe/xe_guc_ct.h
drivers/gpu/drm/xe/xe_guc_ct_types.h
drivers/gpu/drm/xe/xe_guc_submit.c

index eb4af4c7112467d6a81b025a20379e347dfdfcd2..b72407e24d0978bcb25b6162a90208cd16d53a46 100644 (file)
@@ -857,6 +857,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 
        xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 
-       xe_guc_ct_print(&guc->ct, p);
+       xe_guc_ct_print(&guc->ct, p, false);
        xe_guc_submit_print(guc, p);
 }
index e16e5fe37ed49fc934efa4822a144b8ec2af7e5d..e8c2edb1359d9b5fd58f65e4f8c22abbf049ebdf 100644 (file)
@@ -595,7 +595,7 @@ try_again:
 
 broken:
        drm_err(drm, "No forward process on H2G, reset required");
-       xe_guc_ct_print(ct, &p);
+       xe_guc_ct_print(ct, &p, true);
        ct->ctbs.h2g.info.broken = true;
 
        return -EDEADLK;
@@ -1088,38 +1088,40 @@ static void g2h_worker_func(struct work_struct *w)
                        struct drm_device *drm = &ct_to_xe(ct)->drm;
                        struct drm_printer p = drm_info_printer(drm->dev);
 
-                       xe_guc_ct_print(ct, &p);
+                       xe_guc_ct_print(ct, &p, false);
                        kick_reset(ct);
                }
        } while (ret == 1);
        xe_device_mem_access_put(ct_to_xe(ct));
 }
 
-static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
-                            struct drm_printer *p)
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+                                    struct guc_ctb_snapshot *snapshot,
+                                    bool atomic)
 {
        u32 head, tail;
 
-       drm_printf(p, "\tsize: %d\n", ctb->info.size);
-       drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
-       drm_printf(p, "\thead: %d\n", ctb->info.head);
-       drm_printf(p, "\ttail: %d\n", ctb->info.tail);
-       drm_printf(p, "\tspace: %d\n", ctb->info.space);
-       drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
+       xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+                          sizeof(struct guc_ct_buffer_desc));
+       memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
 
-       head = desc_read(xe, ctb, head);
-       tail = desc_read(xe, ctb, tail);
-       drm_printf(p, "\thead (memory): %d\n", head);
-       drm_printf(p, "\ttail (memory): %d\n", tail);
-       drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+       snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
+                                      atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+       if (!snapshot->cmds) {
+               drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
+               return;
+       }
+
+       head = snapshot->desc.head;
+       tail = snapshot->desc.tail;
 
        if (head != tail) {
                struct iosys_map map =
                        IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
 
                while (head != tail) {
-                       drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
-                                  xe_map_rd(xe, &map, 0, u32));
+                       snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
                        ++head;
                        if (head == ctb->info.size) {
                                head = 0;
@@ -1131,20 +1133,140 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
        }
 }
 
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+                                  struct drm_printer *p)
+{
+       u32 head, tail;
+
+       drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+       drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
+       drm_printf(p, "\thead: %d\n", snapshot->info.head);
+       drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+       drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+       drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+       drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+       drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+       drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+
+       if (!snapshot->cmds)
+               return;
+
+       head = snapshot->desc.head;
+       tail = snapshot->desc.tail;
+
+       while (head != tail) {
+               drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+                          snapshot->cmds[head]);
+               ++head;
+               if (head == snapshot->info.size)
+                       head = 0;
+       }
+}
+
+static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
 {
+       kfree(snapshot->cmds);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+                                                     bool atomic)
+{
+       struct xe_device *xe = ct_to_xe(ct);
+       struct xe_guc_ct_snapshot *snapshot;
+
+       snapshot = kzalloc(sizeof(*snapshot),
+                          atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+       if (!snapshot) {
+               drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
+               return NULL;
+       }
+
        if (ct->enabled) {
+               snapshot->ct_enabled = true;
+               guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
+                                        &snapshot->h2g, atomic);
+               guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
+                                        &snapshot->g2h, atomic);
+       }
+
+       return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+                             struct drm_printer *p)
+{
+       if (!snapshot)
+               return;
+
+       if (snapshot->ct_enabled) {
                drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
-               guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+               guc_ctb_snapshot_print(&snapshot->h2g, p);
 
                drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
-               guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
-               drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+               guc_ctb_snapshot_print(&snapshot->g2h, p);
+
+               drm_printf(p, "\tg2h outstanding: %d\n",
+                          snapshot->g2h_outstanding);
        } else {
                drm_puts(p, "\nCT disabled\n");
        }
 }
 
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+       if (!snapshot)
+               return;
+
+       guc_ctb_snapshot_free(&snapshot->h2g);
+       guc_ctb_snapshot_free(&snapshot->g2h);
+       kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+{
+       struct xe_guc_ct_snapshot *snapshot;
+
+       snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+       xe_guc_ct_snapshot_print(snapshot, p);
+       xe_guc_ct_snapshot_free(snapshot);
+}
+
 #ifdef XE_GUC_CT_SELFTEST
 /*
  * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
@@ -1166,7 +1288,7 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
                ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
                if (ret) {
                        drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
-                       xe_guc_ct_print(ct, p);
+                       xe_guc_ct_print(ct, p, true);
                        break;
                }
        }
index 49fb74f91e4df74acb79b77b3d4aa9e6783e3128..3e04ee64652c1ec4cac422070da1cb5085f17981 100644 (file)
@@ -13,9 +13,15 @@ struct drm_printer;
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
 void xe_guc_ct_disable(struct xe_guc_ct *ct);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
+struct xe_guc_ct_snapshot *
+xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+                             struct drm_printer *p);
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
        wake_up_all(&ct->wq);
index 64e3dd14d4b2b71cd41059e2af69deef5f714926..93046d95b009eb9045dd77905136593ae1db60f0 100644 (file)
@@ -48,6 +48,32 @@ struct guc_ctb {
        struct guc_ctb_info info;
 };
 
+/**
+ * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
+ */
+struct guc_ctb_snapshot {
+       /** @desc: snapshot of the CTB descriptor */
+       struct guc_ct_buffer_desc desc;
+       /** @cmds: snapshot of the CTB commands */
+       u32 *cmds;
+       /** @info: snapshot of the CTB info */
+       struct guc_ctb_info info;
+};
+
+/**
+ * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
+ */
+struct xe_guc_ct_snapshot {
+       /** @ct_enabled: CT enabled info at capture time. */
+       bool ct_enabled;
+       /** @g2h_outstanding: G2H outstanding info at the capture time */
+       u32 g2h_outstanding;
+       /** @g2h: G2H CTB snapshot */
+       struct guc_ctb_snapshot g2h;
+       /** @h2g: H2G CTB snapshot */
+       struct guc_ctb_snapshot h2g;
+};
+
 /**
  * struct xe_guc_ct - GuC command transport (CT) layer
  *
index 55b51ff791b8562e676d7eb99bbec198323cef92..f587aa48c5bd1b3562263563da997ab7fbc7c045 100644 (file)
@@ -764,7 +764,7 @@ static void simple_error_capture(struct xe_engine *e)
                }
 
                xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
-               xe_guc_ct_print(&guc->ct, &p);
+               xe_guc_ct_print(&guc->ct, &p, true);
                guc_engine_print(e, &p);
                for_each_hw_engine(hwe, guc_to_gt(guc), id) {
                        if (hwe->class != e->hwe->class ||