drm/xe: Make WA BB part of LRC BO
authorMatthew Brost <matthew.brost@intel.com>
Thu, 12 Jun 2025 03:19:25 +0000 (20:19 -0700)
committerLucas De Marchi <lucas.demarchi@intel.com>
Tue, 1 Jul 2025 20:49:38 +0000 (13:49 -0700)
No idea why, but without this GuC context switches randomly fail when
running IGTs in a loop. Need to follow up why this fixes the
aforementioned issue but can live with a stable driver for now.

Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context utilization")
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Tested-by: Shuicheng Lin <shuicheng.lin@intel.com>
Link: https://lore.kernel.org/r/20250612031925.4009701-1-matthew.brost@intel.com
(cherry picked from commit 3a1edef8f4b58b0ba826bc68bf4bce4bdf59ecf3)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
drivers/gpu/drm/xe/xe_lrc.c
drivers/gpu/drm/xe/xe_lrc_types.h

index bf7c3981897def4dfee84c8d2ad91035ec9de5f0..6e7b70532d111c87932bdb33dcc7f9621ea55baf 100644 (file)
@@ -40,6 +40,7 @@
 
 #define LRC_PPHWSP_SIZE                                SZ_4K
 #define LRC_INDIRECT_RING_STATE_SIZE           SZ_4K
+#define LRC_WA_BB_SIZE                         SZ_4K
 
 static struct xe_device *
 lrc_to_xe(struct xe_lrc *lrc)
@@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
 {
        xe_hw_fence_ctx_finish(&lrc->fence_ctx);
        xe_bo_unpin_map_no_vm(lrc->bo);
-       xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+}
+
+static size_t wa_bb_offset(struct xe_lrc *lrc)
+{
+       return lrc->bo->size - LRC_WA_BB_SIZE;
 }
 
 /*
@@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
 #define CONTEXT_ACTIVE 1ULL
 static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
 {
+       const size_t max_size = LRC_WA_BB_SIZE;
        u32 *cmd, *buf = NULL;
 
-       if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
-               buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL);
+       if (lrc->bo->vmap.is_iomem) {
+               buf = kmalloc(max_size, GFP_KERNEL);
                if (!buf)
                        return -ENOMEM;
                cmd = buf;
        } else {
-               cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+               cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
        }
 
        *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
@@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
        *cmd++ = MI_BATCH_BUFFER_END;
 
        if (buf) {
-               xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0,
-                                buf, (cmd - buf) * sizeof(*cmd));
+               xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
+                                wa_bb_offset(lrc), buf,
+                                (cmd - buf) * sizeof(*cmd));
                kfree(buf);
        }
 
-       xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
-                            xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+       xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
+                            wa_bb_offset(lrc) + 1);
 
        return 0;
 }
@@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
         * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
         * via VM bind calls.
         */
-       lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
+       lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
+                                      lrc_size + LRC_WA_BB_SIZE,
                                       ttm_bo_type_kernel,
                                       bo_flags);
        if (IS_ERR(lrc->bo))
                return PTR_ERR(lrc->bo);
 
-       lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
-                                                 ttm_bo_type_kernel,
-                                                 bo_flags);
-       if (IS_ERR(lrc->bb_per_ctx_bo)) {
-               err = PTR_ERR(lrc->bb_per_ctx_bo);
-               goto err_lrc_finish;
-       }
-
        lrc->size = lrc_size;
        lrc->ring.size = ring_size;
        lrc->ring.tail = 0;
@@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
        snapshot->seqno = xe_lrc_seqno(lrc);
        snapshot->lrc_bo = xe_bo_get(lrc->bo);
        snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
-       snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+       snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
+               LRC_WA_BB_SIZE;
        snapshot->lrc_snapshot = NULL;
        snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
        snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
index ae24cf6f8dd998c51be2cbb394752a757fc51b85..883e550a94234cbcc22173a63e34d60d4a69a617 100644 (file)
@@ -53,9 +53,6 @@ struct xe_lrc {
 
        /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
        u64 ctx_timestamp;
-
-       /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
-       struct xe_bo *bb_per_ctx_bo;
 };
 
 struct xe_lrc_snapshot;