drm/i915/xehp: handle new steering options
authorDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Thu, 29 Jul 2021 16:59:51 +0000 (09:59 -0700)
committerMatt Roper <matthew.d.roper@intel.com>
Thu, 5 Aug 2021 14:59:57 +0000 (07:59 -0700)
Xe_HP is more modular than its predecessors and as a consequence it has
more types of replicated registers.  As with l3bank regions on previous
platforms, we may need to explicitly re-steer accesses to these new
types of ranges at runtime if we can't find a single default steering
value that satisfies the fusing of all types.

v2:
 - Add a local 'i915' variable to reduce gt->i915 usage.  (Caz)
 - Drop unused 'intel_gt_read_register' prototype.  (Caz)

v3:
 - Drop unnecessary comment text.  (Lucas)
 - Drop unused register bit definition.  (Lucas)

Bspec: 66534
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Caz Yokoyama <caz.yokoyama@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-2-matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt_types.h
drivers/gpu/drm/i915/gt/intel_region_lmem.c
drivers/gpu/drm/i915/gt/intel_sseu.c
drivers/gpu/drm/i915/gt/intel_sseu.h
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_device_info.h

index 04dd69bcf6cbe4f6916a40dd0597184e6a19462d..3bd1a31cd3565c9338ec9d001a79d628dcc7c3c8 100644 (file)
@@ -89,18 +89,40 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
        {},
 };
 
+static u16 slicemask(struct intel_gt *gt, int count)
+{
+       u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
+
+       return intel_slicemask_from_dssmask(dss_mask, count);
+}
+
 int intel_gt_init_mmio(struct intel_gt *gt)
 {
+       struct drm_i915_private *i915 = gt->i915;
+
        intel_gt_init_clock_frequency(gt);
 
        intel_uc_init_mmio(&gt->uc);
        intel_sseu_info_init(gt);
 
-       if (GRAPHICS_VER(gt->i915) >= 11) {
+       /*
+        * An mslice is unavailable only if both the meml3 for the slice is
+        * disabled *and* all of the DSS in the slice (quadrant) are disabled.
+        */
+       if (HAS_MSLICES(i915))
+               gt->info.mslice_mask =
+                       slicemask(gt, GEN_DSS_PER_MSLICE) |
+                       (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
+                        GEN12_MEML3_EN_MASK);
+
+       if (GRAPHICS_VER(i915) >= 11 &&
+                  GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
                gt->steering_table[L3BANK] = icl_l3bank_steering_table;
                gt->info.l3bank_mask =
                        ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
                        GEN10_L3BANK_MASK;
+       } else if (HAS_MSLICES(i915)) {
+               MISSING_CASE(INTEL_INFO(i915)->platform);
        }
 
        return intel_engines_init_mmio(gt);
@@ -787,6 +809,22 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt,
                *sliceid = 0;           /* unused */
                *subsliceid = __ffs(gt->info.l3bank_mask);
                break;
+       case MSLICE:
+               GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
+
+               *sliceid = __ffs(gt->info.mslice_mask);
+               *subsliceid = 0;        /* unused */
+               break;
+       case LNCF:
+               GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
+
+               /*
+                * An LNCF is always present if its mslice is present, so we
+                * can safely just steer to LNCF 0 in all cases.
+                */
+               *sliceid = __ffs(gt->info.mslice_mask) << 1;
+               *subsliceid = 0;        /* unused */
+               break;
        default:
                MISSING_CASE(type);
                *sliceid = 0;
index 97a5075288d208e91479fe8c99d311de514388fe..a81e21bf1bd1a74b2225c2db1da7d163ac496700 100644 (file)
@@ -47,9 +47,14 @@ struct intel_mmio_range {
  * of multicast registers.  If another type of steering does not have any
  * overlap in valid steering targets with 'subslice' style registers, we will
  * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
  */
 enum intel_steering_type {
        L3BANK,
+       MSLICE,
+       LNCF,
 
        NUM_STEERING_TYPES
 };
@@ -184,6 +189,8 @@ struct intel_gt {
 
                /* Slice/subslice/EU info */
                struct sseu_dev_info sseu;
+
+               unsigned long mslice_mask;
        } info;
 };
 
index e3a2a2fa5f9488b368ee7ba5115583da57292776..a74b72f50cc98a1dc179201603a3a794983e4f4a 100644 (file)
@@ -10,6 +10,7 @@
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
+#include "gt/intel_gt.h"
 
 static int init_fake_lmem_bar(struct intel_memory_region *mem)
 {
index a648818eafa523e5bbbc92131623241833e0b571..bbd272943c3fc081efd3aeaf9fd251251ee81fdb 100644 (file)
@@ -699,3 +699,21 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
                }
        }
 }
+
+u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
+{
+       u16 slice_mask = 0;
+       int i;
+
+       WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
+
+       for (i = 0; dss_mask; i++) {
+               if (dss_mask & GENMASK(dss_per_slice - 1, 0))
+                       slice_mask |= BIT(i);
+
+               dss_mask >>= dss_per_slice;
+       }
+
+       return slice_mask;
+}
+
index 05a93e4e66cb1f6de8a2dfe0701ee648be0a3d46..22fef98887c0334a69a1d67fc160855e71e4286b 100644 (file)
@@ -22,6 +22,10 @@ struct drm_printer;
 #define GEN_MAX_EUS            (16) /* TGL upper bound */
 #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
 
+#define GEN_DSS_PER_GSLICE     4
+#define GEN_DSS_PER_CSLICE     8
+#define GEN_DSS_PER_MSLICE     8
+
 struct sseu_dev_info {
        u8 slice_mask;
        u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
@@ -104,4 +108,6 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
 void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
                               struct drm_printer *p);
 
+u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
+
 #endif /* __INTEL_SSEU_H__ */
index 053fa7251cd04a28f4c62e2dfb020a595d8bd143..29a76038fb9788ca6f27248176b9d0bbfb08cb9f 100644 (file)
@@ -889,12 +889,24 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
+static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal,
+                        unsigned slice, unsigned subslice)
+{
+       u32 mcr, mcr_mask;
+
+       mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
+       mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+
+       drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
+
+       wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+}
+
 static void
 icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
        const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
        unsigned int slice, subslice;
-       u32 mcr, mcr_mask;
 
        GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
        GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
@@ -919,12 +931,79 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
        if (i915->gt.info.l3bank_mask & BIT(subslice))
                i915->gt.steering_table[L3BANK] = NULL;
 
-       mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
-       mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+       __add_mcr_wa(i915, wal, slice, subslice);
+}
 
-       drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
+__maybe_unused
+static void
+xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       struct drm_i915_private *i915 = gt->i915;
+       const struct sseu_dev_info *sseu = &gt->info.sseu;
+       unsigned long slice, subslice = 0, slice_mask = 0;
+       u64 dss_mask = 0;
+       u32 lncf_mask = 0;
+       int i;
 
-       wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+       /*
+        * On Xe_HP the steering increases in complexity. There are now several
+        * more units that require steering and we're not guaranteed to be able
+        * to find a common setting for all of them. These are:
+        * - GSLICE (fusable)
+        * - DSS (sub-unit within gslice; fusable)
+        * - L3 Bank (fusable)
+        * - MSLICE (fusable)
+        * - LNCF (sub-unit within mslice; always present if mslice is present)
+        * - SQIDI (always on)
+        *
+        * We'll do our default/implicit steering based on GSLICE (in the
+        * sliceid field) and DSS (in the subsliceid field).  If we can
+        * find overlap between the valid MSLICE and/or LNCF values with
+        * a suitable GSLICE, then we can just re-use the default value and
+        * skip and explicit steering at runtime.
+        *
+        * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
+        * a valid sliceid value.  DSS steering is the only type of steering
+        * that utilizes the 'subsliceid' bits.
+        *
+        * Also note that, even though the steering domain is called "GSlice"
+        * and it is encoded in the register using the gslice format, the spec
+        * says that the combined (geometry | compute) fuse should be used to
+        * select the steering.
+        */
+
+       /* Find the potential gslice candidates */
+       dss_mask = intel_sseu_get_subslices(sseu, 0);
+       slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE);
+
+       /*
+        * Find the potential LNCF candidates.  Either LNCF within a valid
+        * mslice is fine.
+        */
+       for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
+               lncf_mask |= (0x3 << (i * 2));
+
+       /*
+        * Are there any sliceid values that work for both GSLICE and LNCF
+        * steering?
+        */
+       if (slice_mask & lncf_mask) {
+               slice_mask &= lncf_mask;
+               gt->steering_table[LNCF] = NULL;
+       }
+
+       /* How about sliceid values that also work for MSLICE steering? */
+       if (slice_mask & gt->info.mslice_mask) {
+               slice_mask &= gt->info.mslice_mask;
+               gt->steering_table[MSLICE] = NULL;
+       }
+
+       slice = __ffs(slice_mask);
+       subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE));
+       WARN_ON(subslice > GEN_DSS_PER_GSLICE);
+       WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0);
+
+       __add_mcr_wa(i915, wal, slice, subslice);
 }
 
 static void
index 65000b57ddb6b517d2b019c6ee74f6ae1e2c66d1..0558921663d97d155da1d025e71b56115e52e0dd 100644 (file)
@@ -1695,6 +1695,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
+#define HAS_MSLICES(dev_priv) \
+       (INTEL_INFO(dev_priv)->has_mslices)
+
 #define HAS_IPC(dev_priv)               (INTEL_INFO(dev_priv)->display.has_ipc)
 
 #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
index b76c2ba8451c0e914bcc6b11623696d41a430138..32358f90b920d3eaaa7e46ee5155f89cf494b3a0 100644 (file)
@@ -975,6 +975,7 @@ static const struct intel_device_info adl_p_info = {
        .has_llc = 1, \
        .has_logical_ring_contexts = 1, \
        .has_logical_ring_elsq = 1, \
+       .has_mslices = 1, \
        .has_rc6 = 1, \
        .has_reset_engine = 1, \
        .has_rps = 1, \
index e7d2fd55be8c8f271476d0296b967210756d3052..7696f0d1c8bbb7702429dafc3a3f02a75fc1cf16 100644 (file)
@@ -3122,6 +3122,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define        GEN10_MIRROR_FUSE3              _MMIO(0x9118)
 #define GEN10_L3BANK_PAIR_COUNT     4
 #define GEN10_L3BANK_MASK   0x0F
+/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
+#define GEN12_MAX_MSLICES 4
+#define GEN12_MEML3_EN_MASK 0x0F
 
 #define GEN8_EU_DISABLE0               _MMIO(0x9134)
 #define   GEN8_EU_DIS0_S0_MASK         0xffffff
index 50ac43d4047fa8ca7156350f81a739dd30a649ca..f88be11a35704ac6d1df8e5bae5c2b0e2c3d981d 100644 (file)
@@ -134,6 +134,7 @@ enum intel_ppgtt_type {
        func(has_logical_ring_contexts); \
        func(has_logical_ring_elsq); \
        func(has_master_unit_irq); \
+       func(has_mslices); \
        func(has_pooled_eu); \
        func(has_rc6); \
        func(has_rc6p); \