drm/xe: Include hardware prefetch buffer in batchbuffer allocations
authorMatt Roper <matthew.d.roper@intel.com>
Wed, 29 Mar 2023 17:33:32 +0000 (10:33 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:30:26 +0000 (18:30 -0500)
The hardware prefetches several cachelines of data from batchbuffers
before they are parsed.  This prefetching only stops when the parser
encounters an MI_BATCH_BUFFER_END instruction (or a nested
MI_BATCH_BUFFER_START), so we must ensure that there is enough padding
at the end of the batchbuffer to prevent the prefetcher from running
past the end of the allocation and potentially faulting.

Bspec: 45717
Link: https://lore.kernel.org/r/20230329173334.4015124-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_bb.c

index 5b24018e2a80b4c5beb327d4d59265e3e79ca0e3..f326f117ba3bb73d36eeefba9c8d320148157ae4 100644 (file)
@@ -8,11 +8,26 @@
 #include "regs/xe_gpu_commands.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
+#include "xe_gt.h"
 #include "xe_hw_fence.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
+static int bb_prefetch(struct xe_gt *gt)
+{
+       struct xe_device *xe = gt->xe;
+
+       if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+               /*
+                * RCS and CCS require 1K, although other engines would be
+                * okay with 512.
+                */
+               return SZ_1K;
+       else
+               return SZ_512;
+}
+
 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 {
        struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
@@ -21,8 +36,14 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
        if (!bb)
                return ERR_PTR(-ENOMEM);
 
-       bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool :
-                             &gt->usm.bb_pool, 4 * dwords + 4);
+       /*
+        * We need to allocate space for the requested number of dwords,
+        * one additional MI_BATCH_BUFFER_END dword, and additional buffer
+        * space to accomodate the platform-specific hardware prefetch
+        * requirements.
+        */
+       bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool : &gt->usm.bb_pool,
+                             4 * (dwords + 1) + bb_prefetch(gt));
        if (IS_ERR(bb->bo)) {
                err = PTR_ERR(bb->bo);
                goto err;