Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm...
authorDave Airlie <airlied@redhat.com>
Fri, 4 Nov 2022 07:20:12 +0000 (17:20 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 4 Nov 2022 07:33:34 +0000 (17:33 +1000)
Driver Changes:

- Fix for #7306: [Arc A380] white flickering when using arc as a
  secondary gpu (Matt A)
- Add Wa_18017747507 for DG2 (Wayne)
- Avoid spurious WARN on DG1 due to incorrect cache_dirty flag
  (Niranjana, Matt A)
- Corrections to CS timestamp support for Gen5 and earlier (Ville)

- Fix a build error used with clang compiler on hwmon (GG)
- Improvements to LMEM handling with RPM (Anshuman, Matt A)
- Cleanups in dmabuf code (Mike)

- Selftest improvements (Matt A)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
150 files changed:
Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon [new file with mode: 0644]
MAINTAINERS
drivers/gpu/drm/i915/Kconfig.profile
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_dpt.c
drivers/gpu/drm/i915/display/intel_fb_pin.c
drivers/gpu/drm/i915/display/intel_lpe_audio.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_internal.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/gem/i915_gem_object.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gem/i915_gem_pages.c
drivers/gpu/drm/i915/gem/i915_gem_pm.c
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
drivers/gpu/drm/i915/gem/i915_gem_stolen.c
drivers/gpu/drm/i915/gem/i915_gem_ttm.c
drivers/gpu/drm/i915/gem/i915_gem_ttm.h
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
drivers/gpu/drm/i915/gem/i915_gem_userptr.c
drivers/gpu/drm/i915/gem/selftests/huge_pages.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.h
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_context.h
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
drivers/gpu/drm/i915/gt/intel_engine_regs.h
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_gpu_commands.h
drivers/gpu/drm/i915/gt/intel_gsc.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt.h
drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
drivers/gpu/drm/i915/gt/intel_gt_mcr.c
drivers/gpu/drm/i915/gt/intel_gt_mcr.h
drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
drivers/gpu/drm/i915/gt/intel_gt_types.h
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_gtt.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_lrc.h
drivers/gpu/drm/i915/gt/intel_migrate.c
drivers/gpu/drm/i915/gt/intel_mocs.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/intel_rps.h
drivers/gpu/drm/i915/gt/intel_sseu.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/intel_workarounds_types.h
drivers/gpu/drm/i915/gt/selftest_engine_cs.c
drivers/gpu/drm/i915/gt/selftest_execlists.c
drivers/gpu/drm/i915/gt/selftest_gt_pm.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/selftest_migrate.c
drivers/gpu/drm/i915/gt/selftest_rps.c
drivers/gpu/drm/i915/gt/selftest_slpc.c
drivers/gpu/drm/i915/gt/selftest_workarounds.c
drivers/gpu/drm/i915/gt/sysfs_engines.c
drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
drivers/gpu/drm/i915/gt/uc/intel_guc.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_huc.c
drivers/gpu/drm/i915/gt/uc/intel_huc.h
drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
drivers/gpu/drm/i915/gvt/cfg_space.c
drivers/gpu/drm/i915/gvt/handlers.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_getparam.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_hwmon.c [new file with mode: 0644]
drivers/gpu/drm/i915/i915_hwmon.h [new file with mode: 0644]
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_perf.h
drivers/gpu/drm/i915/i915_perf_oa_regs.h
drivers/gpu/drm/i915/i915_perf_types.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_reg_defs.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/i915_scatterlist.h
drivers/gpu/drm/i915/i915_selftest.h
drivers/gpu/drm/i915/i915_trace.h
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/intel_device_info.h
drivers/gpu/drm/i915/intel_gvt_mmio_table.c
drivers/gpu/drm/i915/intel_mchbar_regs.h
drivers/gpu/drm/i915/intel_pci_config.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.h
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/i915/intel_uncore.h
drivers/gpu/drm/i915/pxp/intel_pxp.c
drivers/gpu/drm/i915/pxp/intel_pxp.h
drivers/gpu/drm/i915/pxp/intel_pxp_huc.c [new file with mode: 0644]
drivers/gpu/drm/i915/pxp/intel_pxp_huc.h [new file with mode: 0644]
drivers/gpu/drm/i915/pxp/intel_pxp_irq.h
drivers/gpu/drm/i915/pxp/intel_pxp_session.c
drivers/gpu/drm/i915/pxp/intel_pxp_session.h
drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
drivers/gpu/drm/i915/pxp/intel_pxp_tee.h
drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h
drivers/gpu/drm/i915/pxp/intel_pxp_types.h
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
drivers/gpu/drm/i915/selftests/i915_perf.c
drivers/gpu/drm/i915/selftests/i915_request.c
drivers/gpu/drm/i915/selftests/intel_uncore.c
drivers/gpu/drm/i915/selftests/mock_gem_device.c
drivers/misc/mei/bus.c
drivers/misc/mei/client.c
drivers/misc/mei/hbm.c
drivers/misc/mei/hw-me.c
drivers/misc/mei/hw.h
drivers/misc/mei/interrupt.c
drivers/misc/mei/mei_dev.h
drivers/misc/mei/pxp/mei_pxp.c
include/drm/i915_pxp_tee_interface.h
include/linux/mei_cl_bus.h
include/uapi/drm/i915_drm.h

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
new file mode 100644 (file)
index 0000000..2d6a472
--- /dev/null
@@ -0,0 +1,75 @@
+What:          /sys/devices/.../hwmon/hwmon<i>/in0_input
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RO. Current Voltage in millivolt.
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/power1_max
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
+
+               The power controller will throttle the operating frequency
+               if the power averaged over a window (typically seconds)
+               exceeds this limit.
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RO. Card default power limit (default TDP setting).
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RW. Sustained power limit interval (Tau in PL1/Tau) in
+               milliseconds over which sustained power is averaged.
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/power1_crit
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RW. Card reactive critical (I1) power limit in microwatts.
+
+               Card reactive critical (I1) power limit in microwatts is exposed
+               for client products. The power controller will throttle the
+               operating frequency if the power averaged over a window exceeds
+               this limit.
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RW. Card reactive critical (I1) power limit in milliamperes.
+
+               Card reactive critical (I1) power limit in milliamperes is
+               exposed for server products. The power controller will throttle
+               the operating frequency if the power averaged over a window
+               exceeds this limit.
+
+               Only supported for particular Intel i915 graphics platforms.
+
+What:          /sys/devices/.../hwmon/hwmon<i>/energy1_input
+Date:          February 2023
+KernelVersion: 6.2
+Contact:       intel-gfx@lists.freedesktop.org
+Description:   RO. Energy input of device or gt in microjoules.
+
+               For i915 device level hwmon devices (name "i915") this
+               reflects energy input for the entire device. For gt level
+               hwmon devices (name "i915_gtN") this reflects energy input
+               for the gt.
+
+               Only supported for particular Intel i915 graphics platforms.
index ab07cf28844e3f483ee691e59d7fe318e1545751..30e3df70daec93e7adeca7186c978075a223f13f 100644 (file)
@@ -10224,6 +10224,7 @@ Q:      http://patchwork.freedesktop.org/project/intel-gfx/
 B:     https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
 C:     irc://irc.oftc.net/intel-gfx
 T:     git git://anongit.freedesktop.org/drm-intel
+F:     Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
 F:     Documentation/gpu/i915.rst
 F:     drivers/gpu/drm/i915/
 F:     include/drm/i915*
index 39328567c20072f284d3d7eea8a8d650fe2bde74..7cc38d25ee5c8b0d2631c7132d2a52f9be66b1b6 100644 (file)
@@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
        default 640 # milliseconds
        help
          How long to wait (in milliseconds) for a preemption event to occur
-         when submitting a new context via execlists. If the current context
-         does not hit an arbitration point and yield to HW before the timer
-         expires, the HW will be reset to allow the more important context
-         to execute.
+         when submitting a new context. If the current context does not hit
+         an arbitration point and yield to HW before the timer expires, the
+         HW will be reset to allow the more important context to execute.
+
+         This is adjustable via
+         /sys/class/drm/card?/engine/*/preempt_timeout_ms
+
+         May be 0 to disable the timeout.
+
+         The compiled in default may get overridden at driver probe time on
+         certain platforms and certain engines which will be reflected in the
+         sysfs control.
+
+config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
+       int "Preempt timeout for compute engines (ms, jiffy granularity)"
+       default 7500 # milliseconds
+       help
+         How long to wait (in milliseconds) for a preemption event to occur
+         when submitting a new context to a compute capable engine. If the
+         current context does not hit an arbitration point and yield to HW
+         before the timer expires, the HW will be reset to allow the more
+         important context to execute.
 
          This is adjustable via
          /sys/class/drm/card?/engine/*/preempt_timeout_ms
index cea00aaca04b62ea75ce1636e49c9e68a08ae515..51704b54317cfa6aa70e69383602abddac35ef6a 100644 (file)
@@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \
 # graphics system controller (GSC) support
 i915-y += gt/intel_gsc.o
 
+# graphics hardware monitoring (HWMON) support
+i915-$(CONFIG_HWMON) += i915_hwmon.o
+
 # modesetting core code
 i915-y += \
        display/hsw_ips.o \
@@ -310,15 +313,18 @@ i915-y += \
 
 i915-y += i915_perf.o
 
-# Protected execution platform (PXP) support
-i915-$(CONFIG_DRM_I915_PXP) += \
+# Protected execution platform (PXP) support. Base support is required for HuC
+i915-y += \
        pxp/intel_pxp.o \
+       pxp/intel_pxp_tee.o \
+       pxp/intel_pxp_huc.o
+
+i915-$(CONFIG_DRM_I915_PXP) += \
        pxp/intel_pxp_cmd.o \
        pxp/intel_pxp_debugfs.o \
        pxp/intel_pxp_irq.o \
        pxp/intel_pxp_pm.o \
-       pxp/intel_pxp_session.o \
-       pxp/intel_pxp_tee.o
+       pxp/intel_pxp_session.o
 
 # Post-mortem debug and GPU hang state capture
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
index ac587647e1f5015d393270a8aeeb7b360ef236b3..ad1a37b515fb1c8991d1832bd99a2d301bc15c20 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "gem/i915_gem_domain.h"
 #include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
 #include "gt/gen8_ppgtt.h"
 
 #include "i915_drv.h"
index 1dddd6abd77b5c4dae80141e3b231dbd9647a495..6900acbb1381cb9b433d3321ba092cc32a67d26d 100644 (file)
@@ -167,7 +167,6 @@ retry:
                ret = i915_gem_object_attach_phys(obj, alignment);
        else if (!ret && HAS_LMEM(dev_priv))
                ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
-       /* TODO: Do we need to sync when migration becomes async? */
        if (!ret)
                ret = i915_gem_object_pin_pages(obj);
        if (ret)
index 6a7ac60e4f766713f301dc96b21a201b2287edf6..22ca8754ea9645b32c80c761b19303df12fe39ab 100644 (file)
@@ -100,9 +100,9 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
        rsc[0].flags    = IORESOURCE_IRQ;
        rsc[0].name     = "hdmi-lpe-audio-irq";
 
-       rsc[1].start    = pci_resource_start(pdev, GTTMMADR_BAR) +
+       rsc[1].start    = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
                I915_HDMI_LPE_AUDIO_BASE;
-       rsc[1].end      = pci_resource_start(pdev, GTTMMADR_BAR) +
+       rsc[1].end      = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
                I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1;
        rsc[1].flags    = IORESOURCE_MEM;
        rsc[1].name     = "hdmi-lpe-audio-mmio";
index 1e29b1e6d186875b2c132880f2a806e1484dc425..01402f3c58f65c8d84095b2704dff5d30275f55a 100644 (file)
@@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
                int err;
 
                /* serialises with execbuf */
-               set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+               intel_context_close(ce);
                if (!intel_context_pin_if_active(ce))
                        continue;
 
@@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
        }
 
        args->ctx_id = id;
-       drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id);
 
        return 0;
 
index 07eee1c09aaf66936d4662fdebb105bcb90c6450..ec6f7ae47783aceaf7450e6c6dc7b19a4519435b 100644 (file)
@@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
        return to_intel_bo(buf->priv);
 }
 
-static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
+static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
                                             enum dma_data_direction dir)
 {
-       struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
-       struct sg_table *st;
+       struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
+       struct sg_table *sgt;
        struct scatterlist *src, *dst;
        int ret, i;
 
-       /* Copy sg so that we make an independent mapping */
-       st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
-       if (st == NULL) {
+       /*
+        * Make a copy of the object's sgt, so that we can make an independent
+        * mapping
+        */
+       sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+       if (!sgt) {
                ret = -ENOMEM;
                goto err;
        }
 
-       ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
+       ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL);
        if (ret)
                goto err_free;
 
-       src = obj->mm.pages->sgl;
-       dst = st->sgl;
-       for (i = 0; i < obj->mm.pages->nents; i++) {
+       dst = sgt->sgl;
+       for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) {
                sg_set_page(dst, sg_page(src), src->length, 0);
                dst = sg_next(dst);
-               src = sg_next(src);
        }
 
-       ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
+       ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC);
        if (ret)
                goto err_free_sg;
 
-       return st;
+       return sgt;
 
 err_free_sg:
-       sg_free_table(st);
+       sg_free_table(sgt);
 err_free:
-       kfree(st);
+       kfree(sgt);
 err:
        return ERR_PTR(ret);
 }
@@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
 {
        struct drm_i915_private *i915 = to_i915(obj->base.dev);
-       struct sg_table *pages;
+       struct sg_table *sgt;
        unsigned int sg_page_sizes;
 
        assert_object_held(obj);
 
-       pages = dma_buf_map_attachment(obj->base.import_attach,
-                                      DMA_BIDIRECTIONAL);
-       if (IS_ERR(pages))
-               return PTR_ERR(pages);
+       sgt = dma_buf_map_attachment(obj->base.import_attach,
+                                    DMA_BIDIRECTIONAL);
+       if (IS_ERR(sgt))
+               return PTR_ERR(sgt);
 
        /*
         * DG1 is special here since it still snoops transactions even with
@@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
            (!HAS_LLC(i915) && !IS_DG1(i915)))
                wbinvd_on_all_cpus();
 
-       sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
-       __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+       sg_page_sizes = i915_sg_dma_sizes(sgt->sgl);
+       __i915_gem_object_set_pages(obj, sgt, sg_page_sizes);
 
        return 0;
 }
 
 static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
-                                            struct sg_table *pages)
+                                            struct sg_table *sgt)
 {
-       dma_buf_unmap_attachment(obj->base.import_attach, pages,
+       dma_buf_unmap_attachment(obj->base.import_attach, sgt,
                                 DMA_BIDIRECTIONAL);
 }
 
@@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
        get_dma_buf(dma_buf);
 
        obj = i915_gem_object_alloc();
-       if (obj == NULL) {
+       if (!obj) {
                ret = -ENOMEM;
                goto fail_detach;
        }
index 845023c14eb36f1b525d943fe632e58da95f82d8..1160723c9d2d94e0b0cf272ff37664910bf6b2c0 100644 (file)
@@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb,
        int err;
 
        for (n = 0; n < eb->num_fences; n++) {
-               struct drm_syncobj *syncobj;
-               unsigned int flags;
-
-               syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
-
                if (!eb->fences[n].dma_fence)
                        continue;
 
index c698f95af15fee0af3b5f04dd06799643474eed3..629acb403a2c975971a1406822b75c7e38961b75 100644 (file)
@@ -6,7 +6,6 @@
 
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
-#include <linux/swiotlb.h>
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
        struct scatterlist *sg;
        unsigned int sg_page_sizes;
        unsigned int npages;
-       int max_order;
+       int max_order = MAX_ORDER;
+       unsigned int max_segment;
        gfp_t gfp;
 
-       max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-       if (is_swiotlb_active(obj->base.dev->dev)) {
-               unsigned int max_segment;
-
-               max_segment = swiotlb_max_segment();
-               if (max_segment) {
-                       max_segment = max_t(unsigned int, max_segment,
-                                           PAGE_SIZE) >> PAGE_SHIFT;
-                       max_order = min(max_order, ilog2(max_segment));
-               }
-       }
-#endif
+       max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+       max_order = min(max_order, get_order(max_segment));
 
        gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
        if (IS_I965GM(i915) || IS_I965G(i915)) {
index 73d9eda1d6b7a6fdd237da6bff783e67495648cc..e63329bc80659cae184e66ed3f13755e044e0c84 100644 (file)
@@ -413,7 +413,7 @@ retry:
        vma->mmo = mmo;
 
        if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
-               intel_wakeref_auto(&to_gt(i915)->userfault_wakeref,
+               intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
                                   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
        if (write) {
@@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *
 
        drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
 
-       if (obj->userfault_count) {
-               /* rpm wakeref provide exclusive access */
-               list_del(&obj->userfault_link);
-               obj->userfault_count = 0;
-       }
+       /*
+        * We have exclusive access here via runtime suspend. All other callers
+        * must first grab the rpm wakeref.
+        */
+       GEM_BUG_ON(!obj->userfault_count);
+       list_del(&obj->userfault_link);
+       obj->userfault_count = 0;
 }
 
 void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
@@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
                spin_lock(&obj->mmo.lock);
        }
        spin_unlock(&obj->mmo.lock);
-
-       if (obj->userfault_count) {
-               mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
-               list_del(&obj->userfault_link);
-               mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
-               obj->userfault_count = 0;
-       }
 }
 
 static struct i915_mmap_offset *
index 6b8710ba8ded80ddd425ca955988def5f23faedd..733696057761c9fcb77dd51d7b5b0556466de0cf 100644 (file)
@@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
        io_mapping_unmap(src_map);
 }
 
+static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj)
+{
+       GEM_BUG_ON(!i915_gem_object_has_iomem(obj));
+
+       if (IS_DGFX(to_i915(obj->base.dev)))
+               return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource);
+
+       return true;
+}
+
 /**
  * i915_gem_object_read_from_page - read data from the page of a GEM object
  * @obj: GEM object to read from
@@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
 
        if (i915_gem_object_has_struct_page(obj))
                i915_gem_object_read_from_page_kmap(obj, offset, dst, size);
-       else if (i915_gem_object_has_iomem(obj))
+       else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj))
                i915_gem_object_read_from_page_iomap(obj, offset, dst, size);
        else
                return -ENODEV;
index 1723af9b0f6a223c611e2efd2218af105216cc80..6b9ecff42bb5cae980dd1993180f5967738202e6 100644 (file)
@@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
                                                    enum i915_map_type type);
 
+enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
+                                         struct drm_i915_gem_object *obj,
+                                         bool always_coherent);
+
 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
                                 unsigned long offset,
                                 unsigned long size);
index 4df50b049ceaf1c30c4ac958811e713c5ae7f4df..16f845663ff2cf1a709b4e3c8daea0eba45381eb 100644 (file)
@@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
        return ret;
 }
 
+enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
+                                         struct drm_i915_gem_object *obj,
+                                         bool always_coherent)
+{
+       if (i915_gem_object_is_lmem(obj))
+               return I915_MAP_WC;
+       if (HAS_LLC(i915) || always_coherent)
+               return I915_MAP_WB;
+       else
+               return I915_MAP_WC;
+}
+
 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
                                 unsigned long offset,
                                 unsigned long size)
index 3428f735e786c01d9a0dfc214312678dd7fea229..0d812f4d787d7418d89d60f809d5b94179cc0133 100644 (file)
 
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
+       struct intel_gt *gt;
+       unsigned int i;
+
        GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
-       intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0);
+       intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
        flush_workqueue(i915->wq);
 
        /*
@@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
         * state. Fortunately, the kernel_context is disposable and we do
         * not rely on its state.
         */
-       intel_gt_suspend_prepare(to_gt(i915));
+       for_each_gt(gt, i915, i)
+               intel_gt_suspend_prepare(gt);
 
        i915_gem_drain_freed_objects(i915);
 }
@@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
                &i915->mm.purge_list,
                NULL
        }, **phase;
+       struct intel_gt *gt;
        unsigned long flags;
+       unsigned int i;
        bool flush = false;
 
        /*
@@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
         * machine in an unusable condition.
         */
 
-       intel_gt_suspend_late(to_gt(i915));
+       for_each_gt(gt, i915, i)
+               intel_gt_suspend_late(gt);
 
        spin_lock_irqsave(&i915->mm.obj_lock, flags);
        for (phase = phases; *phase; phase++) {
@@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 
 void i915_gem_resume(struct drm_i915_private *i915)
 {
-       int ret;
+       struct intel_gt *gt;
+       int ret, i, j;
 
        GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
@@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915)
         * guarantee that the context image is complete. So let's just reset
         * it and start again.
         */
-       intel_gt_resume(to_gt(i915));
+       for_each_gt(gt, i915, i)
+               if (intel_gt_resume(gt))
+                       goto err_wedged;
 
        ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
        GEM_WARN_ON(ret);
+
+       return;
+
+err_wedged:
+       for_each_gt(gt, i915, j) {
+               if (!intel_gt_is_wedged(gt)) {
+                       dev_err(i915->drm.dev,
+                               "Failed to re-initialize GPU[%u], declaring it wedged!\n",
+                               j);
+                       intel_gt_set_wedged(gt);
+               }
+
+               if (j == i)
+                       break;
+       }
 }
index f42ca1179f3732e9aeb9158c2e295fda30ae1eea..2f7804492cd5cb6df70c284d9ec0e033af4f9829 100644 (file)
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
        struct intel_memory_region *mem = obj->mm.region;
        struct address_space *mapping = obj->base.filp->f_mapping;
        const unsigned long page_count = obj->base.size / PAGE_SIZE;
-       unsigned int max_segment = i915_sg_segment_size();
+       unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
        struct sg_table *st;
        struct sgt_iter sgt_iter;
        struct page *page;
@@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
        __start_cpu_write(obj);
        /*
-        * On non-LLC platforms, force the flush-on-acquire if this is ever
+        * On non-LLC igfx platforms, force the flush-on-acquire if this is ever
         * swapped-in. Our async flush path is not trust worthy enough yet(and
         * happens in the wrong order), and with some tricks it's conceivable
         * for userspace to change the cache-level to I915_CACHE_NONE after the
         * pages are swapped-in, and since execbuf binds the object before doing
         * the async flush, we have a race window.
         */
-       if (!HAS_LLC(i915))
+       if (!HAS_LLC(i915) && !IS_DGFX(i915))
                obj->cache_dirty = true;
 }
 
index acc561c0f0aa2a2a72f98502cfe3ab2f708e4b74..0c70711818edd2354e580aaeb9c46b2503697ae3 100644 (file)
@@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
        mutex_unlock(&i915->mm.stolen_lock);
 }
 
-static int i915_adjust_stolen(struct drm_i915_private *i915,
-                             struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm)
+{
+       return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start;
+}
+
+static int adjust_stolen(struct drm_i915_private *i915,
+                        struct resource *dsm)
 {
        struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-       struct resource *r;
 
-       if (dsm->start == 0 || dsm->end <= dsm->start)
+       if (!valid_stolen_size(i915, dsm))
                return -EINVAL;
 
        /*
+        * Make sure we don't clobber the GTT if it's within stolen memory
+        *
         * TODO: We have yet too encounter the case where the GTT wasn't at the
         * end of stolen. With that assumption we could simplify this.
         */
-
-       /* Make sure we don't clobber the GTT if it's within stolen memory */
        if (GRAPHICS_VER(i915) <= 4 &&
            !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
                struct resource stolen[2] = {*dsm, *dsm};
@@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
                }
        }
 
+       if (!valid_stolen_size(i915, dsm))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int request_smem_stolen(struct drm_i915_private *i915,
+                              struct resource *dsm)
+{
+       struct resource *r;
+
        /*
-        * With stolen lmem, we don't need to check if the address range
-        * overlaps with the non-stolen system memory range, since lmem is local
-        * to the gpu.
+        * With stolen lmem, we don't need to request system memory for the
+        * address range since it's local to the gpu.
+        *
+        * Starting MTL, in IGFX devices the stolen memory is exposed via
+        * LMEMBAR and shall be considered similar to stolen lmem.
         */
-       if (HAS_LMEM(i915))
+       if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915))
                return 0;
 
        /*
@@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 
        drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-       *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
        switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
        case GEN8_STOLEN_RESERVED_1M:
                *size = 1024 * 1024;
@@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
                *size = 8 * 1024 * 1024;
                MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
        }
+
+       if (HAS_LMEMBAR_SMEM_STOLEN(i915))
+               /* the base is initialized to stolen top so subtract size to get base */
+               *base -= *size;
+       else
+               *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
-static int i915_gem_init_stolen(struct intel_memory_region *mem)
+/*
+ * Initialize i915->dsm_reserved to contain the reserved space within the Data
+ * Stolen Memory. This is a range on the top of DSM that is reserved, not to
+ * be used by driver, so must be excluded from the region passed to the
+ * allocator later. In the spec this is also called as WOPCM.
+ *
+ * Our expectation is that the reserved space is at the top of the stolen
+ * region, as it has been the case for every platform, and *never* at the
+ * bottom, so the calculation here can be simplified.
+ */
+static int init_reserved_stolen(struct drm_i915_private *i915)
 {
-       struct drm_i915_private *i915 = mem->i915;
        struct intel_uncore *uncore = &i915->uncore;
        resource_size_t reserved_base, stolen_top;
-       resource_size_t reserved_total, reserved_size;
-
-       mutex_init(&i915->mm.stolen_lock);
-
-       if (intel_vgpu_active(i915)) {
-               drm_notice(&i915->drm,
-                          "%s, disabling use of stolen memory\n",
-                          "iGVT-g active");
-               return 0;
-       }
-
-       if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
-               drm_notice(&i915->drm,
-                          "%s, disabling use of stolen memory\n",
-                          "DMAR active");
-               return 0;
-       }
-
-       if (resource_size(&mem->region) == 0)
-               return 0;
-
-       i915->dsm = mem->region;
-
-       if (i915_adjust_stolen(i915, &i915->dsm))
-               return 0;
-
-       GEM_BUG_ON(i915->dsm.start == 0);
-       GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
+       resource_size_t reserved_size;
+       int ret = 0;
 
        stolen_top = i915->dsm.end + 1;
        reserved_base = stolen_top;
@@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
                                        &reserved_base, &reserved_size);
        }
 
-       /*
-        * Our expectation is that the reserved space is at the top of the
-        * stolen region and *never* at the bottom. If we see !reserved_base,
-        * it likely means we failed to read the registers correctly.
-        */
+       /* No reserved stolen */
+       if (reserved_base == stolen_top)
+               goto bail_out;
+
        if (!reserved_base) {
                drm_err(&i915->drm,
                        "inconsistent reservation %pa + %pa; ignoring\n",
                        &reserved_base, &reserved_size);
-               reserved_base = stolen_top;
-               reserved_size = 0;
+               ret = -EINVAL;
+               goto bail_out;
        }
 
        i915->dsm_reserved =
@@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
                drm_err(&i915->drm,
                        "Stolen reserved area %pR outside stolen memory %pR\n",
                        &i915->dsm_reserved, &i915->dsm);
-               return 0;
+               ret = -EINVAL;
+               goto bail_out;
        }
 
+       return 0;
+
+bail_out:
+       i915->dsm_reserved =
+               (struct resource)DEFINE_RES_MEM(reserved_base, 0);
+
+       return ret;
+}
+
+static int i915_gem_init_stolen(struct intel_memory_region *mem)
+{
+       struct drm_i915_private *i915 = mem->i915;
+
+       mutex_init(&i915->mm.stolen_lock);
+
+       if (intel_vgpu_active(i915)) {
+               drm_notice(&i915->drm,
+                          "%s, disabling use of stolen memory\n",
+                          "iGVT-g active");
+               return -ENOSPC;
+       }
+
+       if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
+               drm_notice(&i915->drm,
+                          "%s, disabling use of stolen memory\n",
+                          "DMAR active");
+               return -ENOSPC;
+       }
+
+       if (adjust_stolen(i915, &mem->region))
+               return -ENOSPC;
+
+       if (request_smem_stolen(i915, &mem->region))
+               return -ENOSPC;
+
+       i915->dsm = mem->region;
+
+       if (init_reserved_stolen(i915))
+               return -ENOSPC;
+
        /* Exclude the reserved region from driver use */
-       mem->region.end = reserved_base - 1;
+       mem->region.end = i915->dsm_reserved.start - 1;
        mem->io_size = min(mem->io_size, resource_size(&mem->region));
 
-       /* It is possible for the reserved area to end before the end of stolen
-        * memory, so just consider the start. */
-       reserved_total = stolen_top - reserved_base;
-
-       i915->stolen_usable_size =
-               resource_size(&i915->dsm) - reserved_total;
+       i915->stolen_usable_size = resource_size(&mem->region);
 
        drm_dbg(&i915->drm,
                "Memory reserved for graphics device: %lluK, usable: %lluK\n",
@@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
                (u64)i915->stolen_usable_size >> 10);
 
        if (i915->stolen_usable_size == 0)
-               return 0;
+               return -ENOSPC;
 
        /* Basic memrange allocator for stolen space. */
        drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
@@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915,
 
 static int init_stolen_smem(struct intel_memory_region *mem)
 {
+       int err;
+
        /*
         * Initialise stolen early so that we may reserve preallocated
         * objects for the BIOS to KMS transition.
         */
-       return i915_gem_init_stolen(mem);
+       err = i915_gem_init_stolen(mem);
+       if (err)
+               drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+
+       return 0;
 }
 
 static int release_stolen_smem(struct intel_memory_region *mem)
@@ -754,26 +799,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
 
 static int init_stolen_lmem(struct intel_memory_region *mem)
 {
+       struct drm_i915_private *i915 = mem->i915;
        int err;
 
        if (GEM_WARN_ON(resource_size(&mem->region) == 0))
-               return -ENODEV;
+               return 0;
 
-       /*
-        * TODO: For stolen lmem we mostly just care about populating the dsm
-        * related bits and setting up the drm_mm allocator for the range.
-        * Perhaps split up i915_gem_init_stolen() for this.
-        */
        err = i915_gem_init_stolen(mem);
-       if (err)
-               return err;
+       if (err) {
+               drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+               return 0;
+       }
 
-       if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
-                                               mem->io_start,
-                                               mem->io_size)) {
-               err = -EIO;
+       if (mem->io_size &&
+           !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
                goto err_cleanup;
-       }
+
+       drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
+               &mem->io_start);
+       drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
 
        return 0;
 
@@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
        .init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+       u16 ggc, gms;
+
+       ggc = intel_uncore_read16(uncore, GGC);
+
+       /* check GGMS, should be fixed 0x3 (8MB) */
+       if ((ggc & GGMS_MASK) != GGMS_MASK)
+               return -EIO;
+
+       /* return valid GMS value, -EIO if invalid */
+       gms = REG_FIELD_GET(GMS_MASK, ggc);
+       switch (gms) {
+       case 0x0 ... 0x04:
+               return gms * 32;
+       case 0xf0 ... 0xfe:
+               return (gms - 0xf0 + 1) * 4;
+       default:
+               MISSING_CASE(gms);
+               return -EIO;
+       }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
                           u16 instance)
@@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
        struct intel_memory_region *mem;
        resource_size_t io_start, io_size;
        resource_size_t min_page_size;
+       int ret;
 
        if (WARN_ON_ONCE(instance))
                return ERR_PTR(-ENODEV);
@@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
        if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
                return ERR_PTR(-ENXIO);
 
-       /* Use DSM base address instead for stolen memory */
-       dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
-       if (IS_DG1(uncore->i915)) {
+       if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) {
                lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-               if (WARN_ON(lmem_size < dsm_base))
-                       return ERR_PTR(-ENODEV);
        } else {
                resource_size_t lmem_range;
 
@@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
                lmem_size *= SZ_1G;
        }
 
-       dsm_size = lmem_size - dsm_base;
-       if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
+       if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+               /*
+                * MTL dsm size is in GGC register.
+                * Also MTL uses offset to DSMBASE in ptes, so i915
+                * uses dsm_base = 0 to setup stolen region.
+                */
+               ret = mtl_get_gms_size(uncore);
+               if (ret < 0) {
+                       drm_err(&i915->drm, "invalid MTL GGC register setting\n");
+                       return ERR_PTR(ret);
+               }
+
+               dsm_base = 0;
+               dsm_size = (resource_size_t)(ret * SZ_1M);
+
+               GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M);
+               GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size);
+       } else {
+               /* Use DSM base address instead for stolen memory */
+               dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
+               if (WARN_ON(lmem_size < dsm_base))
+                       return ERR_PTR(-ENODEV);
+               dsm_size = lmem_size - dsm_base;
+       }
+
+       io_size = dsm_size;
+       if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+               io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M;
+       } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
                io_start = 0;
                io_size = 0;
        } else {
                io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
-               io_size = dsm_size;
        }
 
        min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
        if (IS_ERR(mem))
                return mem;
 
-       /*
-        * TODO: consider creating common helper to just print all the
-        * interesting stuff from intel_memory_region, which we can use for all
-        * our probed regions.
-        */
-
-       drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
-               &mem->io_start);
-       drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
-
        intel_memory_region_set_name(mem, "stolen-local");
 
        mem->private = true;
@@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
        intel_memory_region_set_name(mem, "stolen-system");
 
        mem->private = true;
+
        return mem;
 }
 
index 7a1e92c11946431f3c2a869b7671bad695fbfe09..25129af70f70ffff8b20f1902702835a3c4725c0 100644 (file)
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
        struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
        struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
        struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-       const unsigned int max_segment = i915_sg_segment_size();
+       const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
        const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
        struct file *filp = i915_tt->filp;
        struct sgt_iter sgt_iter;
@@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
        struct i915_ttm_tt *i915_tt;
        int ret;
 
-       if (!obj)
+       if (i915_ttm_is_ghost_object(bo))
                return NULL;
 
        i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
@@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
 {
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 
-       if (!obj)
+       if (i915_ttm_is_ghost_object(bo))
                return false;
 
        /*
@@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
 static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
 {
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       intel_wakeref_t wakeref = 0;
-
-       if (bo->resource && likely(obj)) {
-               /* ttm_bo_release() already has dma_resv_lock */
-               if (i915_ttm_cpu_maps_iomem(bo->resource))
-                       wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
 
+       if (bo->resource && !i915_ttm_is_ghost_object(bo)) {
                __i915_gem_object_pages_fini(obj);
-
-               if (wakeref)
-                       intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
-
                i915_ttm_free_cached_io_rsgt(obj);
        }
 }
@@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
        ret = sg_alloc_table_from_pages_segment(st,
                        ttm->pages, ttm->num_pages,
                        0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-                       i915_sg_segment_size(), GFP_KERNEL);
+                       i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
        if (ret) {
                st->sgl = NULL;
                return ERR_PTR(ret);
@@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
        int ret;
 
-       if (!obj)
+       if (i915_ttm_is_ghost_object(bo))
                return;
 
        ret = i915_ttm_move_notify(bo);
@@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource
        struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo);
        bool unknown_state;
 
-       if (!obj)
+       if (i915_ttm_is_ghost_object(mem->bo))
                return -EINVAL;
 
        if (!kref_get_unless_zero(&obj->base.refcount))
@@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
        unsigned long base;
        unsigned int ofs;
 
-       GEM_BUG_ON(!obj);
+       GEM_BUG_ON(i915_ttm_is_ghost_object(bo));
        GEM_WARN_ON(bo->ttm);
 
        base = obj->mm.region->iomap.base - obj->mm.region->region.start;
@@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
        return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
 }
 
+static int i915_ttm_access_memory(struct ttm_buffer_object *bo,
+                                 unsigned long offset, void *buf,
+                                 int len, int write)
+{
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+       resource_size_t iomap = obj->mm.region->iomap.base -
+               obj->mm.region->region.start;
+       unsigned long page = offset >> PAGE_SHIFT;
+       unsigned long bytes_left = len;
+
+       /*
+        * TODO: For now just let it fail if the resource is non-mappable,
+        * otherwise we need to perform the memcpy from the gpu here, without
+        * interfering with the object (like moving the entire thing).
+        */
+       if (!i915_ttm_resource_mappable(bo->resource))
+               return -EIO;
+
+       offset -= page << PAGE_SHIFT;
+       do {
+               unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
+               void __iomem *ptr;
+               dma_addr_t daddr;
+
+               daddr = i915_gem_object_get_dma_address(obj, page);
+               ptr = ioremap_wc(iomap + daddr + offset, bytes);
+               if (!ptr)
+                       return -EIO;
+
+               if (write)
+                       memcpy_toio(ptr, buf, bytes);
+               else
+                       memcpy_fromio(buf, ptr, bytes);
+               iounmap(ptr);
+
+               page++;
+               buf += bytes;
+               bytes_left -= bytes;
+               offset = 0;
+       } while (bytes_left);
+
+       return len;
+}
+
 /*
  * All callbacks need to take care not to downcast a struct ttm_buffer_object
  * without checking its subclass, since it might be a TTM ghost object.
@@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = {
        .delete_mem_notify = i915_ttm_delete_mem_notify,
        .io_mem_reserve = i915_ttm_io_mem_reserve,
        .io_mem_pfn = i915_ttm_io_mem_pfn,
+       .access_memory = i915_ttm_access_memory,
 };
 
 /**
@@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
        struct vm_area_struct *area = vmf->vma;
        struct ttm_buffer_object *bo = area->vm_private_data;
        struct drm_device *dev = bo->base.dev;
-       struct drm_i915_gem_object *obj;
+       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
        intel_wakeref_t wakeref = 0;
        vm_fault_t ret;
        int idx;
 
-       obj = i915_ttm_to_gem(bo);
-       if (!obj)
+       if (i915_ttm_is_ghost_object(bo))
                return VM_FAULT_SIGBUS;
 
        /* Sanity check that we allow writing into this object */
@@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
                }
 
                if (err) {
-                       drm_dbg(dev, "Unable to make resource CPU accessible\n");
+                       drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n",
+                               ERR_PTR(err));
                        dma_resv_unlock(bo->base.resv);
                        ret = VM_FAULT_SIGBUS;
                        goto out_rpm;
@@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
        if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
                goto out_rpm;
 
-       /* ttm_bo_vm_reserve() already has dma_resv_lock */
+       /*
+        * ttm_bo_vm_reserve() already has dma_resv_lock.
+        * userfault_count is protected by dma_resv lock and rpm wakeref.
+        */
        if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
                obj->userfault_count = 1;
-               mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
-               list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
-               mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
+               spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+               list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list);
+               spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
        }
 
        if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
-               intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
+               intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref,
                                   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
        i915_ttm_adjust_lru(obj);
@@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma)
        struct drm_i915_gem_object *obj =
                i915_ttm_to_gem(vma->vm_private_data);
 
-       GEM_BUG_ON(!obj);
+       GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
        i915_gem_object_get(obj);
 }
 
@@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma)
        struct drm_i915_gem_object *obj =
                i915_ttm_to_gem(vma->vm_private_data);
 
-       GEM_BUG_ON(!obj);
+       GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
        i915_gem_object_put(obj);
 }
 
@@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
 
 static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
 {
+       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+       intel_wakeref_t wakeref = 0;
+
+       assert_object_held_shared(obj);
+
+       if (i915_ttm_cpu_maps_iomem(bo->resource)) {
+               wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+
+               /* userfault_count is protected by obj lock and rpm wakeref. */
+               if (obj->userfault_count) {
+                       spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+                       list_del(&obj->userfault_link);
+                       spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+                       obj->userfault_count = 0;
+               }
+       }
+
        ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
+
+       if (wakeref)
+               intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
index e4842b4296fc2123382ccdd63858b67d02901b00..2a94a99ef76b4f4556e5b8b2c79f435cbfc790da 100644 (file)
@@ -27,19 +27,27 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj)
  */
 void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
 
+/**
+ * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object.
+ * @bo: Pointer to the ttm buffer object
+ *
+ * Return: True if the ttm bo is not a i915 object but a ghost ttm object,
+ * False otherwise.
+ */
+static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
+{
+       return bo->destroy != i915_ttm_bo_destroy;
+}
+
 /**
  * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
  * struct drm_i915_gem_object.
  *
- * Return: Pointer to the embedding struct ttm_buffer_object, or NULL
- * if the object was not an i915 ttm object.
+ * Return: Pointer to the embedding struct ttm_buffer_object.
  */
 static inline struct drm_i915_gem_object *
 i915_ttm_to_gem(struct ttm_buffer_object *bo)
 {
-       if (bo->destroy != i915_ttm_bo_destroy)
-               return NULL;
-
        return container_of(bo, struct drm_i915_gem_object, __do_not_access);
 }
 
index 9a7e50534b84bb9023f1760ec8af90ff50250a34..f59f812dc6d29a6b8451a229b8fa348d0391f4f6 100644 (file)
@@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
        bool clear;
        int ret;
 
-       if (GEM_WARN_ON(!obj)) {
+       if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
                ttm_bo_move_null(bo, dst_mem);
                return 0;
        }
index d4398948f01623d7474593eb4fb8de2690136301..1b1a22716722ed5c3d1f886383e709a77807bd54 100644 (file)
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
        const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
-       unsigned int max_segment = i915_sg_segment_size();
+       unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev);
        struct sg_table *st;
        unsigned int sg_page_sizes;
        struct page **pvec;
@@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
        if (!i915_gem_object_is_readonly(obj))
                gup_flags |= FOLL_WRITE;
 
-       pinned = ret = 0;
+       pinned = 0;
        while (pinned < num_pages) {
                ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
                                          num_pages - pinned, gup_flags,
@@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
 
                pinned += ret;
        }
-       ret = 0;
 
        ret = i915_gem_object_lock_interruptible(obj, NULL);
        if (ret)
index c570cf780079a0a9920b090050130f9b51c7f8f8..0cb99e75b0bc294e08ade9b4ee132c4b24d724b8 100644 (file)
@@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915,
        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
        size = obj->base.size;
-       if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+       if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+           !HAS_64K_PAGES(i915))
                size = round_up(size, I915_GTT_PAGE_SIZE_2M);
 
        n = 0;
@@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
                 * size and ensure the vma offset is at the start of the pt
                 * boundary, however to improve coverage we opt for testing both
                 * aligned and unaligned offsets.
+                *
+                * With PS64 this is no longer the case, but to ensure we
+                * sometimes get the compact layout for smaller objects, apply
+                * the round_up anyway.
                 */
                if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
                        offset_low = round_down(offset_low,
@@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg)
                { SZ_2M + SZ_4K,        SZ_64K | SZ_4K  },
                { SZ_2M + SZ_4K,        SZ_2M  | SZ_4K  },
                { SZ_2M + SZ_64K,       SZ_2M  | SZ_64K },
+               { SZ_2M + SZ_64K,       SZ_64K          },
        };
        int i, j;
        int err;
@@ -1540,6 +1546,154 @@ out_put:
        return err;
 }
 
+static int igt_ppgtt_mixed(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
+       struct drm_i915_gem_object *obj, *on;
+       struct i915_gem_engines *engines;
+       struct i915_gem_engines_iter it;
+       struct i915_address_space *vm;
+       struct i915_gem_context *ctx;
+       struct intel_context *ce;
+       struct file *file;
+       I915_RND_STATE(prng);
+       LIST_HEAD(objects);
+       struct intel_memory_region *mr;
+       struct i915_vma *vma;
+       unsigned int count;
+       u32 i, addr;
+       int *order;
+       int n, err;
+
+       /*
+        * Sanity check mixing 4K and 64K pages within the same page-table via
+        * the new PS64 TLB hint.
+        */
+
+       if (!HAS_64K_PAGES(i915)) {
+               pr_info("device lacks PS64, skipping\n");
+               return 0;
+       }
+
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       ctx = hugepage_ctx(i915, file);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+       vm = i915_gem_context_get_eb_vm(ctx);
+
+       i = 0;
+       addr = 0;
+       do {
+               u32 sz;
+
+               sz = i915_prandom_u32_max_state(SZ_4M, &prng);
+               sz = max_t(u32, sz, SZ_4K);
+
+               mr = i915->mm.regions[INTEL_REGION_LMEM_0];
+               if (i & 1)
+                       mr = i915->mm.regions[INTEL_REGION_SMEM];
+
+               obj = i915_gem_object_create_region(mr, sz, 0, 0);
+               if (IS_ERR(obj)) {
+                       err = PTR_ERR(obj);
+                       goto out_vm;
+               }
+
+               list_add_tail(&obj->st_link, &objects);
+
+               vma = i915_vma_instance(obj, vm, NULL);
+               if (IS_ERR(vma)) {
+                       err = PTR_ERR(vma);
+                       goto err_put;
+               }
+
+               addr = round_up(addr, mr->min_page_size);
+               err = i915_vma_pin(vma, 0, 0, addr | flags);
+               if (err)
+                       goto err_put;
+
+               if (mr->type == INTEL_MEMORY_LOCAL &&
+                   (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
+                       err = -EINVAL;
+                       goto err_put;
+               }
+
+               addr += obj->base.size;
+               i++;
+       } while (addr <= SZ_16M);
+
+       n = 0;
+       count = 0;
+       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+               count++;
+               if (!intel_engine_can_store_dword(ce->engine))
+                       continue;
+
+               n++;
+       }
+       i915_gem_context_unlock_engines(ctx);
+       if (!n)
+               goto err_put;
+
+       order = i915_random_order(count * count, &prng);
+       if (!order) {
+               err = -ENOMEM;
+               goto err_put;
+       }
+
+       i = 0;
+       addr = 0;
+       engines = i915_gem_context_lock_engines(ctx);
+       list_for_each_entry(obj, &objects, st_link) {
+               u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
+
+               addr = round_up(addr, obj->mm.region->min_page_size);
+
+               ce = engines->engines[order[i] % engines->num_engines];
+               i = (i + 1) % (count * count);
+               if (!ce || !intel_engine_can_store_dword(ce->engine))
+                       continue;
+
+               err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
+               if (err)
+                       break;
+
+               err = __igt_write_huge(ce, obj, obj->base.size, addr,
+                                      offset_in_page(rnd) / sizeof(u32), rnd + 1);
+               if (err)
+                       break;
+
+               err = __igt_write_huge(ce, obj, obj->base.size, addr,
+                                      (PAGE_SIZE / sizeof(u32)) - 1,
+                                      rnd + 2);
+               if (err)
+                       break;
+
+               addr += obj->base.size;
+
+               cond_resched();
+       }
+
+       i915_gem_context_unlock_engines(ctx);
+       kfree(order);
+err_put:
+       list_for_each_entry_safe(obj, on, &objects, st_link) {
+               list_del(&obj->st_link);
+               i915_gem_object_put(obj);
+       }
+out_vm:
+       i915_vm_put(vm);
+out:
+       fput(file);
+       return err;
+}
+
 static int igt_tmpfs_fallback(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
                SUBTEST(igt_ppgtt_smoke_huge),
                SUBTEST(igt_ppgtt_sanity_check),
                SUBTEST(igt_ppgtt_compact),
+               SUBTEST(igt_ppgtt_mixed),
        };
 
        if (!HAS_PPGTT(i915)) {
index c6ad67b90e8af2657f6b0c61011fd30bdecaa7d8..d8864444432b778f23fb647d121a04d9dc629018 100644 (file)
@@ -179,97 +179,108 @@ out_file:
 }
 
 struct parallel_switch {
-       struct task_struct *tsk;
+       struct kthread_worker *worker;
+       struct kthread_work work;
        struct intel_context *ce[2];
+       int result;
 };
 
-static int __live_parallel_switch1(void *data)
+static void __live_parallel_switch1(struct kthread_work *work)
 {
-       struct parallel_switch *arg = data;
+       struct parallel_switch *arg =
+               container_of(work, typeof(*arg), work);
        IGT_TIMEOUT(end_time);
        unsigned long count;
 
        count = 0;
+       arg->result = 0;
        do {
                struct i915_request *rq = NULL;
-               int err, n;
+               int n;
 
-               err = 0;
-               for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+               for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
                        struct i915_request *prev = rq;
 
                        rq = i915_request_create(arg->ce[n]);
                        if (IS_ERR(rq)) {
                                i915_request_put(prev);
-                               return PTR_ERR(rq);
+                               arg->result = PTR_ERR(rq);
+                               break;
                        }
 
                        i915_request_get(rq);
                        if (prev) {
-                               err = i915_request_await_dma_fence(rq, &prev->fence);
+                               arg->result =
+                                       i915_request_await_dma_fence(rq,
+                                                                    &prev->fence);
                                i915_request_put(prev);
                        }
 
                        i915_request_add(rq);
                }
+
+               if (IS_ERR_OR_NULL(rq))
+                       break;
+
                if (i915_request_wait(rq, 0, HZ) < 0)
-                       err = -ETIME;
+                       arg->result = -ETIME;
+
                i915_request_put(rq);
-               if (err)
-                       return err;
 
                count++;
-       } while (!__igt_timeout(end_time, NULL));
+       } while (!arg->result && !__igt_timeout(end_time, NULL));
 
-       pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
-       return 0;
+       pr_info("%s: %lu switches (sync) <%d>\n",
+               arg->ce[0]->engine->name, count, arg->result);
 }
 
-static int __live_parallel_switchN(void *data)
+static void __live_parallel_switchN(struct kthread_work *work)
 {
-       struct parallel_switch *arg = data;
+       struct parallel_switch *arg =
+               container_of(work, typeof(*arg), work);
        struct i915_request *rq = NULL;
        IGT_TIMEOUT(end_time);
        unsigned long count;
        int n;
 
        count = 0;
+       arg->result = 0;
        do {
-               for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
+               for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
                        struct i915_request *prev = rq;
-                       int err = 0;
 
                        rq = i915_request_create(arg->ce[n]);
                        if (IS_ERR(rq)) {
                                i915_request_put(prev);
-                               return PTR_ERR(rq);
+                               arg->result = PTR_ERR(rq);
+                               break;
                        }
 
                        i915_request_get(rq);
                        if (prev) {
-                               err = i915_request_await_dma_fence(rq, &prev->fence);
+                               arg->result =
+                                       i915_request_await_dma_fence(rq,
+                                                                    &prev->fence);
                                i915_request_put(prev);
                        }
 
                        i915_request_add(rq);
-                       if (err) {
-                               i915_request_put(rq);
-                               return err;
-                       }
                }
 
                count++;
-       } while (!__igt_timeout(end_time, NULL));
-       i915_request_put(rq);
+       } while (!arg->result && !__igt_timeout(end_time, NULL));
 
-       pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
-       return 0;
+       if (!IS_ERR_OR_NULL(rq))
+               i915_request_put(rq);
+
+       pr_info("%s: %lu switches (many) <%d>\n",
+               arg->ce[0]->engine->name, count, arg->result);
 }
 
 static int live_parallel_switch(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       static int (* const func[])(void *arg) = {
+       static void (* const func[])(struct kthread_work *) = {
                __live_parallel_switch1,
                __live_parallel_switchN,
                NULL,
@@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg)
        struct parallel_switch *data = NULL;
        struct i915_gem_engines *engines;
        struct i915_gem_engines_iter it;
-       int (* const *fn)(void *arg);
+       void (* const *fn)(struct kthread_work *);
        struct i915_gem_context *ctx;
        struct intel_context *ce;
        struct file *file;
@@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg)
                }
        }
 
+       for (n = 0; n < count; n++) {
+               struct kthread_worker *worker;
+
+               if (!data[n].ce[0])
+                       continue;
+
+               worker = kthread_create_worker(0, "igt/parallel:%s",
+                                              data[n].ce[0]->engine->name);
+               if (IS_ERR(worker))
+                       goto out;
+
+               data[n].worker = worker;
+       }
+
        for (fn = func; !err && *fn; fn++) {
                struct igt_live_test t;
-               int n;
 
                err = igt_live_test_begin(&t, i915, __func__, "");
                if (err)
@@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg)
                        if (!data[n].ce[0])
                                continue;
 
-                       data[n].tsk = kthread_run(*fn, &data[n],
-                                                 "igt/parallel:%s",
-                                                 data[n].ce[0]->engine->name);
-                       if (IS_ERR(data[n].tsk)) {
-                               err = PTR_ERR(data[n].tsk);
-                               break;
-                       }
-                       get_task_struct(data[n].tsk);
+                       data[n].result = 0;
+                       kthread_init_work(&data[n].work, *fn);
+                       kthread_queue_work(data[n].worker, &data[n].work);
                }
 
-               yield(); /* start all threads before we kthread_stop() */
-
                for (n = 0; n < count; n++) {
-                       int status;
-
-                       if (IS_ERR_OR_NULL(data[n].tsk))
-                               continue;
-
-                       status = kthread_stop(data[n].tsk);
-                       if (status && !err)
-                               err = status;
-
-                       put_task_struct(data[n].tsk);
-                       data[n].tsk = NULL;
+                       if (data[n].ce[0]) {
+                               kthread_flush_work(&data[n].work);
+                               if (data[n].result && !err)
+                                       err = data[n].result;
+                       }
                }
 
                if (igt_live_test_end(&t))
@@ -399,6 +410,9 @@ out:
                        intel_context_unpin(data[n].ce[m]);
                        intel_context_put(data[n].ce[m]);
                }
+
+               if (data[n].worker)
+                       kthread_destroy_worker(data[n].worker);
        }
        kfree(data);
 out_file:
index f2f3cfad807ba5cb6f2fc3e601fbad65bbe98c07..e57f9390076c5567abcafa9810e15626d6f8a033 100644 (file)
@@ -6,8 +6,12 @@
 
 #include "i915_drv.h"
 #include "i915_selftest.h"
+#include "gem/i915_gem_context.h"
 
+#include "mock_context.h"
 #include "mock_dmabuf.h"
+#include "igt_gem_utils.h"
+#include "selftests/mock_drm.h"
 #include "selftests/mock_gem_device.h"
 
 static int igt_dmabuf_export(void *arg)
@@ -140,6 +144,75 @@ out_ret:
        return err;
 }
 
+static int verify_access(struct drm_i915_private *i915,
+                        struct drm_i915_gem_object *native_obj,
+                        struct drm_i915_gem_object *import_obj)
+{
+       struct i915_gem_engines_iter it;
+       struct i915_gem_context *ctx;
+       struct intel_context *ce;
+       struct i915_vma *vma;
+       struct file *file;
+       u32 *vaddr;
+       int err = 0, i;
+
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       ctx = live_context(i915, file);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out_file;
+       }
+
+       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+               if (intel_engine_can_store_dword(ce->engine))
+                       break;
+       }
+       i915_gem_context_unlock_engines(ctx);
+       if (!ce)
+               goto out_file;
+
+       vma = i915_vma_instance(import_obj, ce->vm, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto out_file;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER);
+       if (err)
+               goto out_file;
+
+       err = igt_gpu_fill_dw(ce, vma, 0,
+                             vma->size >> PAGE_SHIFT, 0xdeadbeaf);
+       i915_vma_unpin(vma);
+       if (err)
+               goto out_file;
+
+       err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT);
+       if (err)
+               goto out_file;
+
+       vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
+       if (IS_ERR(vaddr)) {
+               err = PTR_ERR(vaddr);
+               goto out_file;
+       }
+
+       for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) {
+               if (vaddr[i] != 0xdeadbeaf) {
+                       pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]);
+                       err = -EINVAL;
+                       goto out_file;
+               }
+       }
+
+out_file:
+       fput(file);
+       return err;
+}
+
 static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
                                         struct intel_memory_region **regions,
                                         unsigned int num_regions)
@@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
 
        force_different_devices = true;
 
-       obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
+       obj = __i915_gem_object_create_user(i915, SZ_8M,
                                            regions, num_regions);
        if (IS_ERR(obj)) {
                pr_err("__i915_gem_object_create_user failed with err=%ld\n",
@@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
 
        i915_gem_object_unlock(import_obj);
 
+       err = verify_access(i915, obj, import_obj);
+       if (err)
+               goto out_import;
+
        /* Now try a fake an importer */
        import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
        if (IS_ERR(import_attach)) {
index b73c91aa5450d23677ae139a5bc029297de0f750..1cae24349a96fd7b525c2b951aee79116075a74e 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
index e49fa6fa6aee1cc22f510c9d27a66f5f35618d6a..e1c76e5bfa82796432b74fd7c6637b8342a4ffcf 100644 (file)
@@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
        return 0;
 }
 
-static int __gen125_emit_bb_start(struct i915_request *rq,
-                                 u64 offset, u32 len,
-                                 const unsigned int flags,
-                                 u32 arb)
+static int __xehp_emit_bb_start(struct i915_request *rq,
+                               u64 offset, u32 len,
+                               const unsigned int flags,
+                               u32 arb)
 {
        struct intel_context *ce = rq->context;
        u32 wa_offset = lrc_indirect_bb(ce);
        u32 *cs;
 
+       GEM_BUG_ON(!ce->wa_bb_page);
+
        cs = intel_ring_begin(rq, 12);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
@@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq,
        return 0;
 }
 
-int gen125_emit_bb_start_noarb(struct i915_request *rq,
-                              u64 offset, u32 len,
-                              const unsigned int flags)
+int xehp_emit_bb_start_noarb(struct i915_request *rq,
+                            u64 offset, u32 len,
+                            const unsigned int flags)
 {
-       return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
+       return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
 }
 
-int gen125_emit_bb_start(struct i915_request *rq,
-                        u64 offset, u32 len,
-                        const unsigned int flags)
+int xehp_emit_bb_start(struct i915_request *rq,
+                      u64 offset, u32 len,
+                      const unsigned int flags)
 {
-       return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
+       return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
 }
 
 int gen8_emit_bb_start_noarb(struct i915_request *rq,
@@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
        cs = gen8_emit_pipe_control(cs,
+                                   PIPE_CONTROL_CS_STALL |
+                                   PIPE_CONTROL_TLB_INVALIDATE |
                                    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
                                    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                    PIPE_CONTROL_DC_FLUSH_ENABLE,
@@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
+       cs = gen8_emit_pipe_control(cs,
+                                   PIPE_CONTROL_CS_STALL |
+                                   PIPE_CONTROL_TLB_INVALIDATE |
+                                   PIPE_CONTROL_TILE_CACHE_FLUSH |
+                                   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                   PIPE_CONTROL_DC_FLUSH_ENABLE,
+                                   0);
+
+       /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
        cs = gen8_emit_ggtt_write_rcs(cs,
                                      rq->fence.seqno,
                                      hwsp_offset(rq),
-                                     PIPE_CONTROL_CS_STALL |
-                                     PIPE_CONTROL_TILE_CACHE_FLUSH |
-                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                     PIPE_CONTROL_DC_FLUSH_ENABLE |
-                                     PIPE_CONTROL_FLUSH_ENABLE);
+                                     PIPE_CONTROL_FLUSH_ENABLE |
+                                     PIPE_CONTROL_CS_STALL);
 
        return gen8_emit_fini_breadcrumb_tail(rq, cs);
 }
@@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
        struct drm_i915_private *i915 = rq->engine->i915;
        u32 flags = (PIPE_CONTROL_CS_STALL |
+                    PIPE_CONTROL_TLB_INVALIDATE |
                     PIPE_CONTROL_TILE_CACHE_FLUSH |
                     PIPE_CONTROL_FLUSH_L3 |
                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
        else if (rq->engine->class == COMPUTE_CLASS)
                flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
+       cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
+
+       /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
        cs = gen12_emit_ggtt_write_rcs(cs,
                                       rq->fence.seqno,
                                       hwsp_offset(rq),
-                                      PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-                                      flags);
+                                      0,
+                                      PIPE_CONTROL_FLUSH_ENABLE |
+                                      PIPE_CONTROL_CS_STALL);
 
        return gen12_emit_fini_breadcrumb_tail(rq, cs);
 }
index e4d24c811dd61dbbfdea8ced5bf6d272d0c341af..655e5c00ddc27751ef3f92d8020676ca6252d745 100644 (file)
@@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq,
                       u64 offset, u32 len,
                       const unsigned int flags);
 
-int gen125_emit_bb_start_noarb(struct i915_request *rq,
-                              u64 offset, u32 len,
-                              const unsigned int flags);
-int gen125_emit_bb_start(struct i915_request *rq,
-                        u64 offset, u32 len,
-                        const unsigned int flags);
+int xehp_emit_bb_start_noarb(struct i915_request *rq,
+                            u64 offset, u32 len,
+                            const unsigned int flags);
+int xehp_emit_bb_start(struct i915_request *rq,
+                      u64 offset, u32 len,
+                      const unsigned int flags);
 
 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
 u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
index 2128b7a72a2575dce41f28279a6bb6f18abd7e7b..4daaa6f556688869b0cb788fd6609cda8b8ec5cd 100644 (file)
@@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
        const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
        unsigned int rem = sg_dma_len(iter->sg);
        u64 start = vma_res->start;
+       u64 end = start + vma_res->vma_size;
 
        GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
@@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
                gen8_pte_t encode = pte_encode;
                unsigned int page_size;
                gen8_pte_t *vaddr;
-               u16 index, max;
+               u16 index, max, nent, i;
 
                max = I915_PDES;
+               nent = 1;
 
                if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
@@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
 
                        vaddr = px_vaddr(pd);
                } else {
-                       if (encode & GEN12_PPGTT_PTE_LM) {
-                               GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
-                               GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
-                               GEM_BUG_ON(!IS_ALIGNED(iter->dma,
-                                                      I915_GTT_PAGE_SIZE_64K));
-
-                               index = __gen8_pte_index(start, 0) / 16;
-                               page_size = I915_GTT_PAGE_SIZE_64K;
-
-                               max /= 16;
-
-                               vaddr = px_vaddr(pd);
-                               vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+                       index =  __gen8_pte_index(start, 0);
+                       page_size = I915_GTT_PAGE_SIZE;
 
-                               pt->is_compact = true;
-                       } else {
-                               GEM_BUG_ON(pt->is_compact);
-                               index =  __gen8_pte_index(start, 0);
-                               page_size = I915_GTT_PAGE_SIZE;
+                       if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+                               /*
+                                * Device local-memory on these platforms should
+                                * always use 64K pages or larger (including GTT
+                                * alignment), therefore if we know the whole
+                                * page-table needs to be filled we can always
+                                * safely use the compact-layout. Otherwise fall
+                                * back to the TLB hint with PS64. If this is
+                                * system memory we only bother with PS64.
+                                */
+                               if ((encode & GEN12_PPGTT_PTE_LM) &&
+                                   end - start >= SZ_2M && !index) {
+                                       index = __gen8_pte_index(start, 0) / 16;
+                                       page_size = I915_GTT_PAGE_SIZE_64K;
+
+                                       max /= 16;
+
+                                       vaddr = px_vaddr(pd);
+                                       vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+
+                                       pt->is_compact = true;
+                               } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+                                          rem >= I915_GTT_PAGE_SIZE_64K &&
+                                          !(index % 16)) {
+                                       encode |= GEN12_PTE_PS64;
+                                       page_size = I915_GTT_PAGE_SIZE_64K;
+                                       nent = 16;
+                               }
                        }
 
                        vaddr = px_vaddr(pt);
@@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
 
                do {
                        GEM_BUG_ON(rem < page_size);
-                       vaddr[index++] = encode | iter->dma;
+
+                       for (i = 0; i < nent; i++) {
+                               vaddr[index++] =
+                                       encode | (iter->dma + i *
+                                                 I915_GTT_PAGE_SIZE);
+                       }
 
                        start += page_size;
                        iter->dma += page_size;
@@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
        GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
        GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
 
+       /* XXX: we don't strictly need to use this layout */
+
        if (!pt->is_compact) {
                vaddr = px_vaddr(pd);
                vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
@@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
         */
        ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
 
-       if (HAS_LMEM(gt->i915)) {
+       if (HAS_LMEM(gt->i915))
                ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
-
-               /*
-                * On some platforms the hw has dropped support for 4K GTT pages
-                * when dealing with LMEM, and due to the design of 64K GTT
-                * pages in the hw, we can only mark the *entire* page-table as
-                * operating in 64K GTT mode, since the enable bit is still on
-                * the pde, and not the pte. And since we still need to allow
-                * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
-                * page-table with scratch pointing to LMEM, since that's
-                * undefined from the hw pov. The simplest solution is to just
-                * move the 64K scratch page to SMEM on such platforms and call
-                * it a day, since that should work for all configurations.
-                */
-               if (HAS_64K_PAGES(gt->i915))
-                       ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
-               else
-                       ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
-       } else {
+       else
                ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
-               ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
-       }
+
+       /*
+        * Using SMEM here instead of LMEM has the advantage of not reserving
+        * high performance memory for a "never" used filler page. It also
+        * removes the device access that would be required to initialise the
+        * scratch page, reducing pressure on an even scarcer resource.
+        */
+       ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
        ppgtt->vm.pte_encode = gen8_pte_encode;
 
index be09fb2e883a547d8e132d7f858d10ff7d5e6dd0..fb62b7b8cbcda6ef2bf5487c69a1ffd74ac99c34 100644 (file)
@@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
        return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
 }
 
+static inline void intel_context_close(struct intel_context *ce)
+{
+       set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+
+       if (ce->ops->close)
+               ce->ops->close(ce);
+}
+
 static inline bool intel_context_is_closed(const struct intel_context *ce)
 {
        return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
index 04eacae1aca55e4784c5d7f18d602f09af4eebe9..e36670f2e6260bfc894b500899f5a995bbc9e67c 100644 (file)
@@ -43,6 +43,8 @@ struct intel_context_ops {
        void (*revoke)(struct intel_context *ce, struct i915_request *rq,
                       unsigned int preempt_timeout_ms);
 
+       void (*close)(struct intel_context *ce);
+
        int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
        int (*pin)(struct intel_context *ce, void *vaddr);
        void (*unpin)(struct intel_context *ce);
@@ -197,8 +199,6 @@ struct intel_context {
                 * context's submissions is complete.
                 */
                struct i915_sw_fence blocked;
-               /** @number_committed_requests: number of committed requests */
-               int number_committed_requests;
                /** @requests: list of active requests on this context */
                struct list_head requests;
                /** @prio: the context's current guc priority */
@@ -208,6 +208,11 @@ struct intel_context {
                 * each priority bucket
                 */
                u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
+               /**
+                * @sched_disable_delay_work: worker to disable scheduling on this
+                * context
+                */
+               struct delayed_work sched_disable_delay_work;
        } guc_state;
 
        struct {
index 04e435bce79bdfc731b0f4bf834e3bd06381e2da..cbc8b857d5f7a0948897b12e154564395a16f282 100644 (file)
@@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine)
        return engine->hung_ce;
 }
 
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
index 1f7188129cd1f6a1f15a91f15121d42a333f43ce..3b7d750ad0541043da6b45974992ee24c67282e7 100644 (file)
@@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
        engine->logical_mask = BIT(logical_instance);
        __sprint_engine_name(engine);
 
+       if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
+            __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
+            engine->class == RENDER_CLASS)
+               engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
+
+       /* features common between engines sharing EUs */
+       if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
+               engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
+               engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
+       }
+
        engine->props.heartbeat_interval_ms =
                CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
        engine->props.max_busywait_duration_ns =
@@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
        engine->props.timeslice_duration_ms =
                CONFIG_DRM_I915_TIMESLICE_DURATION;
 
-       /* Override to uninterruptible for OpenCL workloads. */
-       if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
-               engine->props.preempt_timeout_ms = 0;
-
-       if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
-            __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
-            engine->class == RENDER_CLASS)
-               engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
-
-       /* features common between engines sharing EUs */
-       if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
-               engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
-               engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
-       }
+       /*
+        * Mid-thread pre-emption is not available in Gen12. Unfortunately,
+        * some compute workloads run quite long threads. That means they get
+        * reset due to not pre-empting in a timely manner. So, bump the
+        * pre-emption timeout value to be much higher for compute engines.
+        */
+       if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
+               engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
+
+       /* Cap properties according to any system limits */
+#define CLAMP_PROP(field) \
+       do { \
+               u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+               if (clamp != engine->props.field) { \
+                       drm_notice(&engine->i915->drm, \
+                                  "Warning, clamping %s to %lld to prevent overflow\n", \
+                                  #field, clamp); \
+                       engine->props.field = clamp; \
+               } \
+       } while (0)
+
+       CLAMP_PROP(heartbeat_interval_ms);
+       CLAMP_PROP(max_busywait_duration_ns);
+       CLAMP_PROP(preempt_timeout_ms);
+       CLAMP_PROP(stop_timeout_ms);
+       CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
 
        engine->defaults = engine->props; /* never to change again */
 
@@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
        return 0;
 }
 
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
+{
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
+{
+       value = min(value, jiffies_to_nsecs(2));
+
+       return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+       /*
+        * NB: The GuC API only supports 32bit values. However, the limit is further
+        * reduced due to internal calculations which would otherwise overflow.
+        */
+       if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+               value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
+{
+       /*
+        * NB: The GuC API only supports 32bit values. However, the limit is further
+        * reduced due to internal calculations which would otherwise overflow.
+        */
+       if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+               value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+       value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+       return value;
+}
+
 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = engine->i915;
@@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt)
                        return err;
 
                err = setup(engine);
-               if (err)
+               if (err) {
+                       intel_engine_cleanup_common(engine);
                        return err;
+               }
+
+               /* The backend should now be responsible for cleanup */
+               GEM_BUG_ON(engine->release == NULL);
 
                err = engine_init_common(engine);
                if (err)
@@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
                for_each_ss_steering(iter, engine->gt, slice, subslice) {
                        instdone->sampler[slice][subslice] =
                                intel_gt_mcr_read(engine->gt,
-                                                 GEN7_SAMPLER_INSTDONE,
+                                                 GEN8_SAMPLER_INSTDONE,
                                                  slice, subslice);
                        instdone->row[slice][subslice] =
                                intel_gt_mcr_read(engine->gt,
-                                                 GEN7_ROW_INSTDONE,
+                                                 GEN8_ROW_INSTDONE,
                                                  slice, subslice);
                }
 
index a3698f611f4577032ceab0dc2876a818018bd045..9a527e1f5be6552a5dd67ff0938f3128a4bae562 100644 (file)
 
 static bool next_heartbeat(struct intel_engine_cs *engine)
 {
+       struct i915_request *rq;
        long delay;
 
        delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+       rq = engine->heartbeat.systole;
+
+       /*
+        * FIXME: The final period extension is disabled if the period has been
+        * modified from the default. This is to prevent issues with certain
+        * selftests which override the value and expect specific behaviour.
+        * Once the selftests have been updated to either cope with variable
+        * heartbeat periods (or to override the pre-emption timeout as well,
+        * or just to add a selftest specific override of the extension), the
+        * generic override can be removed.
+        */
+       if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+           delay == engine->defaults.heartbeat_interval_ms) {
+               long longer;
+
+               /*
+                * The final try is at the highest priority possible. Up until now
+                * a pre-emption might not even have been attempted. So make sure
+                * this last attempt allows enough time for a pre-emption to occur.
+                */
+               longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
+               longer = intel_clamp_heartbeat_interval_ms(engine, longer);
+               if (longer > delay)
+                       delay = longer;
+       }
+
        if (!delay)
                return false;
 
@@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
        if (!delay && !intel_engine_has_preempt_reset(engine))
                return -ENODEV;
 
+       /* FIXME: Remove together with equally marked hack in next_heartbeat. */
+       if (delay != engine->defaults.heartbeat_interval_ms &&
+           delay < 2 * engine->props.preempt_timeout_ms) {
+               if (intel_engine_uses_guc(engine))
+                       drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n",
+                                  engine->name);
+               else
+                       drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n",
+                                  engine->name);
+       }
+
        intel_engine_pm_get(engine);
 
        err = mutex_lock_interruptible(&ce->timeline->mutex);
index fe1a0d5fd4b1ad439cb4f6b0ecc1c4ba424c25a3..ee3efd06ee54bcd434dcad2493d5d3113cb469c6 100644 (file)
 #define RING_CONTEXT_STATUS_PTR(base)          _MMIO((base) + 0x3a0)
 #define RING_CTX_TIMESTAMP(base)               _MMIO((base) + 0x3a8) /* gen8+ */
 #define RING_PREDICATE_RESULT(base)            _MMIO((base) + 0x3b8)
+#define MI_PREDICATE_RESULT_2_ENGINE(base)     _MMIO((base) + 0x3bc)
 #define RING_FORCE_TO_NONPRIV(base, i)         _MMIO(((base) + 0x4D0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY           REG_BIT(30)
 #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK   REG_GENMASK(25, 2)
index c718e6dc40b5158c0f4fdd89d27deddd02dbef3d..0187bc72310d6f8fa4cb7fa7492dc8b4638ee5d9 100644 (file)
@@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 
        if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
                if (intel_engine_has_preemption(engine))
-                       engine->emit_bb_start = gen125_emit_bb_start;
+                       engine->emit_bb_start = xehp_emit_bb_start;
                else
-                       engine->emit_bb_start = gen125_emit_bb_start_noarb;
+                       engine->emit_bb_start = xehp_emit_bb_start_noarb;
        } else {
                if (intel_engine_has_preemption(engine))
                        engine->emit_bb_start = gen8_emit_bb_start;
index 2049a00417afad506b0c92b55bcc8c5a8185b521..2518cebbf931c78e9ed53be91fb855f3c60d0446 100644 (file)
@@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
        u32 pte_flags;
        int ret;
 
-       GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
-       phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+       GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
+       phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
 
        /*
         * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
@@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
        unsigned int size;
        u16 snb_gmch_ctl;
 
-       if (!HAS_LMEM(i915)) {
-               if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
+       if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+               if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
                        return -ENXIO;
 
-               ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
+               ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
                ggtt->mappable_end = resource_size(&ggtt->gmadr);
        }
 
@@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
        ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-       setup_private_pat(ggtt->vm.gt->uncore);
+       setup_private_pat(ggtt->vm.gt);
 
        return ggtt_probe_common(ggtt, size);
 }
@@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
        unsigned int size;
        u16 snb_gmch_ctl;
 
-       if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
+       if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
                return -ENXIO;
 
-       ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
+       ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
        ggtt->mappable_end = resource_size(&ggtt->gmadr);
 
        /*
@@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
                wbinvd_on_all_cpus();
 
        if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
-               setup_private_pat(ggtt->vm.gt->uncore);
+               setup_private_pat(ggtt->vm.gt);
 
        intel_ggtt_restore_fences(ggtt);
 }
index d4e9702d3c8e7f06c007bc8cfb031672aa81c9bb..f50ea92910d9728b5f4137f08b19ada0cd8b4dea 100644 (file)
 #define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
 #define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
 
+#define MI_OPCODE(x)           (((x) >> 23) & 0x3f)
+#define IS_MI_LRI_CMD(x)       (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
+#define MI_LRI_LEN(x)          (((x) & 0xff) + 1)
+
 /*
  * 3D instructions used by the kernel
  */
index 7af6db3194ddbf6bd8c77c47bbd6c795b35fab0e..976fdf27e79097ae1ac8dec4599b96bd0ac6db2c 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/mei_aux.h>
 #include "i915_drv.h"
 #include "i915_reg.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gt/intel_gsc.h"
 #include "gt/intel_gt.h"
@@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915,
        struct intel_gsc_intf *intf = &gsc->intf[intf_id];
 
        if (intf->adev) {
-               auxiliary_device_delete(&intf->adev->aux_dev);
-               auxiliary_device_uninit(&intf->adev->aux_dev);
+               struct auxiliary_device *aux_dev = &intf->adev->aux_dev;
+
+               if (intf_id == 0)
+                       intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+                                                         aux_dev->dev.bus);
+
+               auxiliary_device_delete(aux_dev);
+               auxiliary_device_uninit(aux_dev);
                intf->adev = NULL;
        }
 
@@ -242,14 +249,24 @@ add_device:
                goto fail;
        }
 
+       intf->adev = adev; /* needed by the notifier */
+
+       if (intf_id == 0)
+               intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+                                               aux_dev->dev.bus);
+
        ret = auxiliary_device_add(aux_dev);
        if (ret < 0) {
                drm_err(&i915->drm, "gsc aux add failed %d\n", ret);
+               if (intf_id == 0)
+                       intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+                                                         aux_dev->dev.bus);
+               intf->adev = NULL;
+
                /* adev will be freed with the put_device() and .release sequence */
                auxiliary_device_uninit(aux_dev);
                goto fail;
        }
-       intf->adev = adev;
 
        return;
 fail:
index d0b03a928b9acaaae274907fa73e4431186470e6..8e914c4066ed5b408979555cb6e21936b1af7968 100644 (file)
@@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt)
 {
        spin_lock_init(gt->irq_lock);
 
-       INIT_LIST_HEAD(&gt->lmem_userfault_list);
-       mutex_init(&gt->lmem_userfault_lock);
        INIT_LIST_HEAD(&gt->closed_vma);
        spin_lock_init(&gt->closed_lock);
 
@@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
        GEN6_RING_FAULT_REG_POSTING_READ(engine);
 }
 
+i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
+{
+       /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
+       if (GRAPHICS_VER(gt->i915) < 11)
+               return INVALID_MMIO_REG;
+
+       return gt->type == GT_MEDIA ?
+               MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
+}
+
 void
 intel_gt_clear_error_registers(struct intel_gt *gt,
                               intel_engine_mask_t engine_mask)
@@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
                                   I915_MASTER_ERROR_INTERRUPT);
        }
 
-       if (GRAPHICS_VER(i915) >= 12) {
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
+                                          RING_FAULT_VALID, 0);
+               intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+       } else if (GRAPHICS_VER(i915) >= 12) {
                rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
                intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
        } else if (GRAPHICS_VER(i915) >= 8) {
@@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt)
        }
 }
 
+static void xehp_check_faults(struct intel_gt *gt)
+{
+       u32 fault;
+
+       /*
+        * Although the fault register now lives in an MCR register range,
+        * the GAM registers are special and we only truly need to read
+        * the "primary" GAM instance rather than handling each instance
+        * individually.  intel_gt_mcr_read_any() will automatically steer
+        * toward the primary instance.
+        */
+       fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+       if (fault & RING_FAULT_VALID) {
+               u32 fault_data0, fault_data1;
+               u64 fault_addr;
+
+               fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
+               fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
+
+               fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+                            ((u64)fault_data0 << 12);
+
+               drm_dbg(&gt->i915->drm, "Unexpected fault\n"
+                       "\tAddr: 0x%08x_%08x\n"
+                       "\tAddress space: %s\n"
+                       "\tEngine ID: %d\n"
+                       "\tSource ID: %d\n"
+                       "\tType: %d\n",
+                       upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+                       fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+                       GEN8_RING_FAULT_ENGINE_ID(fault),
+                       RING_FAULT_SRCID(fault),
+                       RING_FAULT_FAULT_TYPE(fault));
+       }
+}
+
 static void gen8_check_faults(struct intel_gt *gt)
 {
        struct intel_uncore *uncore = gt->uncore;
@@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
        struct drm_i915_private *i915 = gt->i915;
 
        /* From GEN8 onwards we only have one 'All Engine Fault Register' */
-       if (GRAPHICS_VER(i915) >= 8)
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+               xehp_check_faults(gt);
+       else if (GRAPHICS_VER(i915) >= 8)
                gen8_check_faults(gt);
        else if (GRAPHICS_VER(i915) >= 6)
                gen6_check_faults(gt);
@@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
        }
 
        intel_uncore_init_early(gt->uncore, gt);
-       intel_wakeref_auto_init(&gt->userfault_wakeref, gt->uncore->rpm);
 
        ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
        if (ret)
@@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
        unsigned int i;
        int ret;
 
-       mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
+       mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
        phys_addr = pci_resource_start(pdev, mmio_bar);
 
        /*
@@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 }
 
 struct reg_and_bit {
-       i915_reg_t reg;
+       union {
+               i915_reg_t reg;
+               i915_mcr_reg_t mcr_reg;
+       };
        u32 bit;
 };
 
@@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
        return rb;
 }
 
+/*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+#define TLB_INVAL_TIMEOUT_US 100
+#define TLB_INVAL_TIMEOUT_MS 4
+
+/*
+ * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
+ * but are now considered MCR registers.  Since they exist within a GAM range,
+ * the primary instance of the register rolls up the status from each unit.
+ */
+static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+{
+       if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+               return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
+                                                   TLB_INVAL_TIMEOUT_US,
+                                                   TLB_INVAL_TIMEOUT_MS);
+       else
+               return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+                                                   TLB_INVAL_TIMEOUT_US,
+                                                   TLB_INVAL_TIMEOUT_MS,
+                                                   NULL);
+}
+
 static void mmio_invalidate_full(struct intel_gt *gt)
 {
        static const i915_reg_t gen8_regs[] = {
@@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
                [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
                [COMPUTE_CLASS]                 = GEN12_COMPCTX_TLB_INV_CR,
        };
+       static const i915_mcr_reg_t xehp_regs[] = {
+               [RENDER_CLASS]                  = XEHP_GFX_TLB_INV_CR,
+               [VIDEO_DECODE_CLASS]            = XEHP_VD_TLB_INV_CR,
+               [VIDEO_ENHANCEMENT_CLASS]       = XEHP_VE_TLB_INV_CR,
+               [COPY_ENGINE_CLASS]             = XEHP_BLT_TLB_INV_CR,
+               [COMPUTE_CLASS]                 = XEHP_COMPCTX_TLB_INV_CR,
+       };
        struct drm_i915_private *i915 = gt->i915;
        struct intel_uncore *uncore = gt->uncore;
        struct intel_engine_cs *engine;
@@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt)
        const i915_reg_t *regs;
        unsigned int num = 0;
 
-       if (GRAPHICS_VER(i915) == 12) {
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               regs = NULL;
+               num = ARRAY_SIZE(xehp_regs);
+       } else if (GRAPHICS_VER(i915) == 12) {
                regs = gen12_regs;
                num = ARRAY_SIZE(gen12_regs);
        } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
@@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
                if (!intel_engine_pm_is_awake(engine))
                        continue;
 
-               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-               if (!i915_mmio_reg_offset(rb.reg))
-                       continue;
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+                       intel_gt_mcr_multicast_write_fw(gt,
+                                                       xehp_regs[engine->class],
+                                                       BIT(engine->instance));
+               } else {
+                       rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+                       if (!i915_mmio_reg_offset(rb.reg))
+                               continue;
 
-               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+                       intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               }
                awake |= engine->mask;
        }
 
@@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
        for_each_engine_masked(engine, gt, awake, tmp) {
                struct reg_and_bit rb;
 
-               /*
-                * HW architecture suggest typical invalidation time at 40us,
-                * with pessimistic cases up to 100us and a recommendation to
-                * cap at 1ms. We go a bit higher just in case.
-                */
-               const unsigned int timeout_us = 100;
-               const unsigned int timeout_ms = 4;
-
-               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-               if (__intel_wait_for_register_fw(uncore,
-                                                rb.reg, rb.bit, 0,
-                                                timeout_us, timeout_ms,
-                                                NULL))
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+                       rb.mcr_reg = xehp_regs[engine->class];
+                       rb.bit = BIT(engine->instance);
+               } else {
+                       rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               }
+
+               if (wait_for_invalidate(gt, rb))
                        drm_err_ratelimited(&gt->i915->drm,
                                            "%s TLB invalidation did not complete in %ums!\n",
-                                           engine->name, timeout_ms);
+                                           engine->name, TLB_INVAL_TIMEOUT_MS);
        }
 
        /*
index 2ee582e287c8dd7d698d695fd96a77f9f67ad3ee..e0365d55624846c09fa024be3d0f4ba72b768aea 100644 (file)
@@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
 
 void intel_gt_check_and_clear_faults(struct intel_gt *gt);
+i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt);
 void intel_gt_clear_error_registers(struct intel_gt *gt,
                                    intel_engine_mask_t engine_mask);
 
index 3f656d3dba9a8c8b82128ad47dea3f052abadba9..2a6a4ca7fdad6de759b5abb5adf78d62a1a5baa7 100644 (file)
@@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
        return freq;
 }
 
-static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
+static u32 gen6_read_clock_frequency(struct intel_uncore *uncore)
 {
        /*
         * PRMs say:
@@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
        return 12500000;
 }
 
-static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
+static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
+{
+       /*
+        * 63:32 increments every 1000 ns
+        * 31:0 mbz
+        */
+       return 1000000000 / 1000;
+}
+
+static u32 g4x_read_clock_frequency(struct intel_uncore *uncore)
+{
+       /*
+        * 63:20 increments every 1/4 ns
+        * 19:0 mbz
+        *
+        * -> 63:32 increments every 1024 ns
+        */
+       return 1000000000 / 1024;
+}
+
+static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
 {
        /*
         * PRMs say:
@@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
         *     "The value in this register increments once every 16
         *      hclks." (through the “Clocking Configuration”
         *      (“CLKCFG”) MCHBAR register)
+        *
+        * Testing on actual hardware has shown there is no /16.
         */
-       return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
+       return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
 }
 
 static u32 read_clock_frequency(struct intel_uncore *uncore)
@@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
                return gen11_read_clock_frequency(uncore);
        else if (GRAPHICS_VER(uncore->i915) >= 9)
                return gen9_read_clock_frequency(uncore);
-       else if (GRAPHICS_VER(uncore->i915) >= 5)
+       else if (GRAPHICS_VER(uncore->i915) >= 6)
+               return gen6_read_clock_frequency(uncore);
+       else if (GRAPHICS_VER(uncore->i915) == 5)
                return gen5_read_clock_frequency(uncore);
+       else if (IS_G4X(uncore->i915))
+               return g4x_read_clock_frequency(uncore);
+       else if (GRAPHICS_VER(uncore->i915) == 4)
+               return gen4_read_clock_frequency(uncore);
        else
-               return gen2_read_clock_frequency(uncore);
+               return 0;
 }
 
 void intel_gt_init_clock_frequency(struct intel_gt *gt)
index e79405a45312284adb2dd9fb6a9a8bff908ac93f..830edffe88ccef5ff249abb940ab6a7898396489 100644 (file)
@@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = {
        "L3BANK",
        "MSLICE",
        "LNCF",
+       "GAM",
+       "DSS",
+       "OADDRM",
        "INSTANCE 0",
 };
 
@@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
        {},
 };
 
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules.  Thus we use a separate
+ * GAM table farther down for those.
+ */
 static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
-       { 0x004000, 0x004AFF },
-       { 0x00C800, 0x00CFFF },
        { 0x00DD00, 0x00DDFF },
        { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
        {},
 };
 
+static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
+       { 0x004000, 0x004AFF },
+       { 0x00C800, 0x00CFFF },
+       {},
+};
+
 static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
        { 0x00B000, 0x00B0FF },
        { 0x00D800, 0x00D8FF },
@@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = {
        {},
 };
 
+static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
+       { 0x000B00, 0x000BFF },         /* SQIDI */
+       { 0x001000, 0x001FFF },         /* SQIDI */
+       { 0x004000, 0x0048FF },         /* GAM */
+       { 0x008700, 0x0087FF },         /* SQIDI */
+       { 0x00B000, 0x00B0FF },         /* NODE */
+       { 0x00C800, 0x00CFFF },         /* GAM */
+       { 0x00D880, 0x00D8FF },         /* NODE */
+       { 0x00DD00, 0x00DDFF },         /* OAAL2 */
+       {},
+};
+
+static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
+       { 0x00B100, 0x00B3FF },
+       {},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct intel_mmio_range xelpg_dss_steering_table[] = {
+       { 0x005200, 0x0052FF },         /* SLICE */
+       { 0x005500, 0x007FFF },         /* SLICE */
+       { 0x008140, 0x00815F },         /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+       { 0x0094D0, 0x00955F },         /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+       { 0x009680, 0x0096FF },         /* DSS */
+       { 0x00D800, 0x00D87F },         /* SLICE */
+       { 0x00DC00, 0x00DCFF },         /* SLICE */
+       { 0x00DE80, 0x00E8FF },         /* DSS (0xE000-0xE0FF reserved) */
+       {},
+};
+
+static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
+       { 0x393200, 0x39323F },
+       { 0x393400, 0x3934FF },
+       {},
+};
+
 void intel_gt_mcr_init(struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
+       unsigned long fuse;
+       int i;
 
        /*
         * An mslice is unavailable only if both the meml3 for the slice is
@@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt)
                        drm_warn(&i915->drm, "mslice mask all zero!\n");
        }
 
-       if (IS_PONTEVECCHIO(i915)) {
+       if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
+               gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
+       } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+               fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
+                                    intel_uncore_read(gt->uncore, XEHP_FUSE4));
+
+               /*
+                * Despite the register field being named "exclude mask" the
+                * bits actually represent enabled banks (two banks per bit).
+                */
+               for_each_set_bit(i, &fuse, 3)
+                       gt->info.l3bank_mask |= 0x3 << 2 * i;
+
+               gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
+               gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
+               gt->steering_table[DSS] = xelpg_dss_steering_table;
+       } else if (IS_PONTEVECCHIO(i915)) {
                gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
        } else if (IS_DG2(i915)) {
                gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
                gt->steering_table[LNCF] = dg2_lncf_steering_table;
+               /*
+                * No need to hook up the GAM table since it has a dedicated
+                * steering control register on DG2 and can use implicit
+                * steering.
+                */
        } else if (IS_XEHPSDV(i915)) {
                gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
                gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
+               gt->steering_table[GAM] = xehpsdv_gam_steering_table;
        } else if (GRAPHICS_VER(i915) >= 11 &&
                   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
                gt->steering_table[L3BANK] = icl_l3bank_steering_table;
@@ -134,6 +206,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
        }
 }
 
+/*
+ * Although the rest of the driver should use MCR-specific functions to
+ * read/write MCR registers, we still use the regular intel_uncore_* functions
+ * internally to implement those, so we need a way for the functions in this
+ * file to "cast" an i915_mcr_reg_t into an i915_reg_t.
+ */
+static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
+{
+       i915_reg_t r = { .reg = mcr.reg };
+
+       return r;
+}
+
 /*
  * rw_with_mcr_steering_fw - Access a register with specific MCR steering
  * @uncore: pointer to struct intel_uncore
@@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt)
  * Caller needs to make sure the relevant forcewake wells are up.
  */
 static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
-                                  i915_reg_t reg, u8 rw_flag,
+                                  i915_mcr_reg_t reg, u8 rw_flag,
                                   int group, int instance, u32 value)
 {
        u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
        lockdep_assert_held(&uncore->lock);
 
-       if (GRAPHICS_VER(uncore->i915) >= 11) {
+       if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
+               /*
+                * Always leave the hardware in multicast mode when doing reads
+                * (see comment about Wa_22013088509 below) and only change it
+                * to unicast mode when doing writes of a specific instance.
+                *
+                * No need to save old steering reg value.
+                */
+               intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
+                                     REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+                                     REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) |
+                                     (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0));
+       } else if (GRAPHICS_VER(uncore->i915) >= 11) {
                mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
                mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
 
@@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
                 */
                if (rw_flag == FW_REG_WRITE)
                        mcr_mask |= GEN11_MCR_MULTICAST;
+
+               mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+               old_mcr = mcr;
+
+               mcr &= ~mcr_mask;
+               mcr |= mcr_ss;
+               intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
        } else {
                mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
                mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
-       }
 
-       old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+               mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+               old_mcr = mcr;
 
-       mcr &= ~mcr_mask;
-       mcr |= mcr_ss;
-       intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+               mcr &= ~mcr_mask;
+               mcr |= mcr_ss;
+               intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+       }
 
        if (rw_flag == FW_REG_READ)
-               val = intel_uncore_read_fw(uncore, reg);
+               val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
        else
-               intel_uncore_write_fw(uncore, reg, value);
-
-       mcr &= ~mcr_mask;
-       mcr |= old_mcr & mcr_mask;
+               intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
 
-       intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+       /*
+        * For pre-MTL platforms, we need to restore the old value of the
+        * steering control register to ensure that implicit steering continues
+        * to behave as expected.  For MTL and beyond, we need only reinstate
+        * the 'multicast' bit (and only if we did a write that cleared it).
+        */
+       if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE)
+               intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+       else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70))
+               intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr);
 
        return val;
 }
 
 static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
-                               i915_reg_t reg, u8 rw_flag,
+                               i915_mcr_reg_t reg, u8 rw_flag,
                                int group, int instance,
                                u32 value)
 {
        enum forcewake_domains fw_domains;
        u32 val;
 
-       fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
+       fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
                                                    rw_flag);
        fw_domains |= intel_uncore_forcewake_for_reg(uncore,
                                                     GEN8_MCR_SELECTOR,
@@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
  * group/instance.
  */
 u32 intel_gt_mcr_read(struct intel_gt *gt,
-                     i915_reg_t reg,
+                     i915_mcr_reg_t reg,
                      int group, int instance)
 {
        return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
@@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
  * Write an MCR register in unicast mode after steering toward a specific
  * group/instance.
  */
-void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
+void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value,
                                int group, int instance)
 {
        rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
@@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
  * Write an MCR register in multicast mode to update all instances.
  */
 void intel_gt_mcr_multicast_write(struct intel_gt *gt,
-                               i915_reg_t reg, u32 value)
+                                 i915_mcr_reg_t reg, u32 value)
 {
-       intel_uncore_write(gt->uncore, reg, value);
+       /*
+        * Ensure we have multicast behavior, just in case some non-i915 agent
+        * left the hardware in unicast mode.
+        */
+       if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+               intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+
+       intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value);
 }
 
 /**
@@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
  * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
  * be obtained automatically.
  */
-void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
+void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value)
+{
+       /*
+        * Ensure we have multicast behavior, just in case some non-i915 agent
+        * left the hardware in unicast mode.
+        */
+       if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+               intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+
+       intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value);
+}
+
+/**
+ * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
+ * @gt: GT structure
+ * @reg: the MCR register to read and write
+ * @clear: bits to clear during RMW
+ * @set: bits to set during RMW
+ *
+ * Performs a read-modify-write on an MCR register in a multicast manner.
+ * This operation only makes sense on MCR registers where all instances are
+ * expected to have the same value.  The read will target any non-terminated
+ * instance and the write will be applied to all instances.
+ *
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
+ * be obtained automatically.
+ *
+ * Returns the old (unmodified) value read.
+ */
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
+                              u32 clear, u32 set)
 {
-       intel_uncore_write_fw(gt->uncore, reg, value);
+       u32 val = intel_gt_mcr_read_any(gt, reg);
+
+       intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
+
+       return val;
 }
 
 /*
@@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va
  * for @type steering too.
  */
 static bool reg_needs_read_steering(struct intel_gt *gt,
-                                   i915_reg_t reg,
+                                   i915_mcr_reg_t reg,
                                    enum intel_steering_type type)
 {
        const u32 offset = i915_mmio_reg_offset(reg);
@@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt,
                                       enum intel_steering_type type,
                                       u8 *group, u8 *instance)
 {
+       u32 dss;
+
        switch (type) {
        case L3BANK:
                *group = 0;             /* unused */
@@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt,
                *group = __ffs(gt->info.mslice_mask) << 1;
                *instance = 0;  /* unused */
                break;
+       case GAM:
+               *group = IS_DG2(gt->i915) ? 1 : 0;
+               *instance = 0;
+               break;
+       case DSS:
+               dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
+               *group = dss / GEN_DSS_PER_GSLICE;
+               *instance = dss % GEN_DSS_PER_GSLICE;
+               break;
        case INSTANCE0:
                /*
                 * There are a lot of MCR types for which instance (0, 0)
@@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt,
                *group = 0;
                *instance = 0;
                break;
+       case OADDRM:
+               if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0))
+                       *group = 0;
+               else
+                       *group = 1;
+               *instance = 0;
+               break;
        default:
                MISSING_CASE(type);
                *group = 0;
@@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt,
  * steering.
  */
 void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
-                                            i915_reg_t reg,
+                                            i915_mcr_reg_t reg,
                                             u8 *group, u8 *instance)
 {
        int type;
@@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
  *
  * Returns the value from a non-terminated instance of @reg.
  */
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg)
 {
        int type;
        u8 group, instance;
@@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
                }
        }
 
-       return intel_uncore_read_fw(gt->uncore, reg);
+       return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg));
 }
 
 /**
@@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
  *
  * Returns the value from a non-terminated instance of @reg.
  */
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg)
 {
        int type;
        u8 group, instance;
@@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
                }
        }
 
-       return intel_uncore_read(gt->uncore, reg);
+       return intel_uncore_read(gt->uncore, mcr_reg_cast(reg));
 }
 
 static void report_steering_type(struct drm_printer *p,
@@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p,
 void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
                                  bool dump_table)
 {
-       drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
-                  gt->default_steering.groupid,
-                  gt->default_steering.instanceid);
-
-       if (IS_PONTEVECCHIO(gt->i915)) {
+       /*
+        * Starting with MTL we no longer have default steering;
+        * all ranges are explicitly steered.
+        */
+       if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))
+               drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
+                          gt->default_steering.groupid,
+                          gt->default_steering.instanceid);
+
+       if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+               for (int i = 0; i < NUM_STEERING_TYPES; i++)
+                       if (gt->steering_table[i])
+                               report_steering_type(p, gt, i, dump_table);
+       } else if (IS_PONTEVECCHIO(gt->i915)) {
                report_steering_type(p, gt, INSTANCE0, dump_table);
        } else if (HAS_MSLICE_STEERING(gt->i915)) {
                report_steering_type(p, gt, MSLICE, dump_table);
@@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
                return;
        }
 }
+
+/**
+ * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state
+ * @gt: GT structure
+ * @reg: the register to read
+ * @mask: mask to apply to register value
+ * @value: value to wait for
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ *
+ * This routine waits until the target register @reg contains the expected
+ * @value after applying the @mask, i.e. it waits until ::
+ *
+ *     (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
+ *
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20,0000 microseconds.
+ *
+ * This function is basically an MCR-friendly version of
+ * __intel_wait_for_register_fw().  Generally this function will only be used
+ * on GAM registers which are a bit special --- although they're MCR registers,
+ * reads (e.g., waiting for status updates) are always directed to the primary
+ * instance.
+ *
+ * Note that this routine assumes the caller holds forcewake asserted, it is
+ * not suitable for very long waits.
+ *
+ * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
+ */
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+                                i915_mcr_reg_t reg,
+                                u32 mask,
+                                u32 value,
+                                unsigned int fast_timeout_us,
+                                unsigned int slow_timeout_ms)
+{
+       u32 reg_value = 0;
+#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value)
+       int ret;
+
+       /* Catch any overuse of this function */
+       might_sleep_if(slow_timeout_ms);
+       GEM_BUG_ON(fast_timeout_us > 20000);
+       GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
+
+       ret = -ETIMEDOUT;
+       if (fast_timeout_us && fast_timeout_us <= 20000)
+               ret = _wait_for_atomic(done, fast_timeout_us, 0);
+       if (ret && slow_timeout_ms)
+               ret = wait_for(done, slow_timeout_ms);
+
+       return ret;
+#undef done
+}
index 77a8b11c287ddb7f004513db7038a029a3146cf2..3fb0502bff22d8bac617bddb93f1a8ffb735b261 100644 (file)
 void intel_gt_mcr_init(struct intel_gt *gt);
 
 u32 intel_gt_mcr_read(struct intel_gt *gt,
-                     i915_reg_t reg,
+                     i915_mcr_reg_t reg,
                      int group, int instance);
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg);
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg);
 
 void intel_gt_mcr_unicast_write(struct intel_gt *gt,
-                               i915_reg_t reg, u32 value,
+                               i915_mcr_reg_t reg, u32 value,
                                int group, int instance);
 void intel_gt_mcr_multicast_write(struct intel_gt *gt,
-                                 i915_reg_t reg, u32 value);
+                                 i915_mcr_reg_t reg, u32 value);
 void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
-                                    i915_reg_t reg, u32 value);
+                                    i915_mcr_reg_t reg, u32 value);
+
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
+                              u32 clear, u32 set);
 
 void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
-                                            i915_reg_t reg,
+                                            i915_mcr_reg_t reg,
                                             u8 *group, u8 *instance);
 
 void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
@@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
 void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
                                  unsigned int *group, unsigned int *instance);
 
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+                                i915_mcr_reg_t reg,
+                                u32 mask,
+                                u32 value,
+                                unsigned int fast_timeout_us,
+                                unsigned int slow_timeout_ms);
+
 /*
  * Helper for for_each_ss_steering loop.  On pre-Xe_HP platforms, subslice
  * presence is determined by using the group/instance as direct lookups in the
index 108b9e76c32e874924bc1d6b32ff21dce484eff7..40d0a3be42acf910e40f4d11d0e64ea24931c17e 100644 (file)
@@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
                drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
                           intel_gpu_freq(rps, rps->efficient_freq));
        } else if (GRAPHICS_VER(i915) >= 6) {
-               u32 rp_state_limits;
-               u32 gt_perf_status;
-               struct intel_rps_freq_caps caps;
-               u32 rpmodectl, rpinclimit, rpdeclimit;
-               u32 rpstat, cagf, reqf;
-               u32 rpcurupei, rpcurup, rpprevup;
-               u32 rpcurdownei, rpcurdown, rpprevdown;
-               u32 rpupei, rpupt, rpdownei, rpdownt;
-               u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
-
-               rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
-               gen6_rps_get_freq_caps(rps, &caps);
-               if (IS_GEN9_LP(i915))
-                       gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
-               else
-                       gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
-
-               /* RPSTAT1 is in the GT power well */
-               intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-               reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
-               if (GRAPHICS_VER(i915) >= 9) {
-                       reqf >>= 23;
-               } else {
-                       reqf &= ~GEN6_TURBO_DISABLE;
-                       if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-                               reqf >>= 24;
-                       else
-                               reqf >>= 25;
-               }
-               reqf = intel_gpu_freq(rps, reqf);
-
-               rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
-               rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
-               rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
-               rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
-               rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
-               rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
-               rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
-               rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
-               rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
-               rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
-
-               rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
-               rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
-
-               rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
-               rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
-               cagf = intel_rps_read_actual_frequency(rps);
-
-               intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-
-               if (GRAPHICS_VER(i915) >= 11) {
-                       pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-                       pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
-                       /*
-                        * The equivalent to the PM ISR & IIR cannot be read
-                        * without affecting the current state of the system
-                        */
-                       pm_isr = 0;
-                       pm_iir = 0;
-               } else if (GRAPHICS_VER(i915) >= 8) {
-                       pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
-                       pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
-                       pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
-                       pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
-               } else {
-                       pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
-                       pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
-                       pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
-                       pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
-               }
-               pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
-
-               drm_printf(p, "Video Turbo Mode: %s\n",
-                          str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
-               drm_printf(p, "HW control enabled: %s\n",
-                          str_yes_no(rpmodectl & GEN6_RP_ENABLE));
-               drm_printf(p, "SW control enabled: %s\n",
-                          str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
-
-               drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
-                          pm_ier, pm_imr, pm_mask);
-               if (GRAPHICS_VER(i915) <= 10)
-                       drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
-                                  pm_isr, pm_iir);
-               drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
-                          rps->pm_intrmsk_mbz);
-               drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
-               drm_printf(p, "Render p-state ratio: %d\n",
-                          (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
-               drm_printf(p, "Render p-state VID: %d\n",
-                          gt_perf_status & 0xff);
-               drm_printf(p, "Render p-state limit: %d\n",
-                          rp_state_limits & 0xff);
-               drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
-               drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
-               drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
-               drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
-               drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
-               drm_printf(p, "CAGF: %dMHz\n", cagf);
-               drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
-                          rpcurupei,
-                          intel_gt_pm_interval_to_ns(gt, rpcurupei));
-               drm_printf(p, "RP CUR UP: %d (%lldns)\n",
-                          rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
-               drm_printf(p, "RP PREV UP: %d (%lldns)\n",
-                          rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
-               drm_printf(p, "Up threshold: %d%%\n",
-                          rps->power.up_threshold);
-               drm_printf(p, "RP UP EI: %d (%lldns)\n",
-                          rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
-               drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
-                          rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
-
-               drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
-                          rpcurdownei,
-                          intel_gt_pm_interval_to_ns(gt, rpcurdownei));
-               drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
-                          rpcurdown,
-                          intel_gt_pm_interval_to_ns(gt, rpcurdown));
-               drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
-                          rpprevdown,
-                          intel_gt_pm_interval_to_ns(gt, rpprevdown));
-               drm_printf(p, "Down threshold: %d%%\n",
-                          rps->power.down_threshold);
-               drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
-                          rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
-               drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
-                          rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
-
-               drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, caps.min_freq));
-               drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, caps.rp1_freq));
-               drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, caps.rp0_freq));
-               drm_printf(p, "Max overclocked frequency: %dMHz\n",
-                          intel_gpu_freq(rps, rps->max_freq));
-
-               drm_printf(p, "Current freq: %d MHz\n",
-                          intel_gpu_freq(rps, rps->cur_freq));
-               drm_printf(p, "Actual freq: %d MHz\n", cagf);
-               drm_printf(p, "Idle freq: %d MHz\n",
-                          intel_gpu_freq(rps, rps->idle_freq));
-               drm_printf(p, "Min freq: %d MHz\n",
-                          intel_gpu_freq(rps, rps->min_freq));
-               drm_printf(p, "Boost freq: %d MHz\n",
-                          intel_gpu_freq(rps, rps->boost_freq));
-               drm_printf(p, "Max freq: %d MHz\n",
-                          intel_gpu_freq(rps, rps->max_freq));
-               drm_printf(p,
-                          "efficient (RPe) frequency: %d MHz\n",
-                          intel_gpu_freq(rps, rps->efficient_freq));
+               gen6_rps_frequency_dump(rps, p);
        } else {
                drm_puts(p, "no P-state info available\n");
        }
@@ -655,6 +500,44 @@ static bool rps_eval(void *data)
 
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
 
+static int perf_limit_reasons_get(void *data, u64 *val)
+{
+       struct intel_gt *gt = data;
+       intel_wakeref_t wakeref;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               *val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt));
+
+       return 0;
+}
+
+static int perf_limit_reasons_clear(void *data, u64 val)
+{
+       struct intel_gt *gt = data;
+       intel_wakeref_t wakeref;
+
+       /*
+        * Clear the upper 16 "log" bits, the lower 16 "status" bits are
+        * read-only. The upper 16 "log" bits are identical to the lower 16
+        * "status" bits except that the "log" bits remain set until cleared.
+        */
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt),
+                                GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
+
+       return 0;
+}
+
+static bool perf_limit_reasons_eval(void *data)
+{
+       struct intel_gt *gt = data;
+
+       return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt));
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
+                       perf_limit_reasons_clear, "%llu\n");
+
 void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
 {
        static const struct intel_gt_debugfs_file files[] = {
@@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
                { "forcewake_user", &forcewake_user_fops, NULL},
                { "llc", &llc_fops, llc_eval },
                { "rps_boost", &rps_boost_fops, rps_eval },
+               { "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval },
        };
 
        intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
index 5a3a25838fff2706771616b6c770eafdc4fe6b87..70177d3f2e945b58a509b35fc40c325628cad6e3 100644 (file)
@@ -8,6 +8,19 @@
 
 #include "i915_reg_defs.h"
 
+#define MCR_REG(offset)        ((const i915_mcr_reg_t){ .reg = (offset) })
+
+/*
+ * The perf control registers are technically multicast registers, but the
+ * driver never needs to read/write them directly; we only use them to build
+ * lists of registers (where they're mixed in with other non-MCR registers)
+ * and then operate on the offset directly.  For now we'll just define them
+ * as non-multicast so we can place them on the same list, but we may want
+ * to try to come up with a better way to handle heterogeneous lists of
+ * registers in the future.
+ */
+#define PERF_REG(offset)                       _MMIO(offset)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0                            _MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT    3
 #define FORCEWAKE_ACK_RENDER_GEN9              _MMIO(0xd84)
 #define FORCEWAKE_ACK_MEDIA_GEN9               _MMIO(0xd88)
 
+#define FORCEWAKE_ACK_GSC                      _MMIO(0xdf8)
+#define FORCEWAKE_ACK_GT_MTL                   _MMIO(0xdfc)
+
 #define GMD_ID_GRAPHICS                                _MMIO(0xd8c)
 #define GMD_ID_MEDIA                           _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c)
 
 #define MCFG_MCR_SELECTOR                      _MMIO(0xfd0)
+#define MTL_MCR_SELECTOR                       _MMIO(0xfd4)
 #define SF_MCR_SELECTOR                                _MMIO(0xfd8)
 #define GEN8_MCR_SELECTOR                      _MMIO(0xfdc)
+#define GAM_MCR_SELECTOR                       _MMIO(0xfe0)
 #define   GEN8_MCR_SLICE(slice)                        (((slice) & 3) << 26)
 #define   GEN8_MCR_SLICE_MASK                  GEN8_MCR_SLICE(3)
 #define   GEN8_MCR_SUBSLICE(subslice)          (((subslice) & 3) << 24)
@@ -54,6 +72,8 @@
 #define   GEN11_MCR_SLICE_MASK                 GEN11_MCR_SLICE(0xf)
 #define   GEN11_MCR_SUBSLICE(subslice)         (((subslice) & 0x7) << 24)
 #define   GEN11_MCR_SUBSLICE_MASK              GEN11_MCR_SUBSLICE(0x7)
+#define   MTL_MCR_GROUPID                      REG_GENMASK(11, 8)
+#define   MTL_MCR_INSTANCEID                   REG_GENMASK(3, 0)
 
 #define IPEIR_I965                             _MMIO(0x2064)
 #define IPEHR_I965                             _MMIO(0x2068)
 #define GEN7_TLB_RD_ADDR                       _MMIO(0x4700)
 
 #define GEN12_PAT_INDEX(index)                 _MMIO(0x4800 + (index) * 4)
+#define XEHP_PAT_INDEX(index)                  MCR_REG(0x4800 + (index) * 4)
 
-#define XEHP_TILE0_ADDR_RANGE                  _MMIO(0x4900)
+#define XEHP_TILE0_ADDR_RANGE                  MCR_REG(0x4900)
 #define   XEHP_TILE_LMEM_RANGE_SHIFT           8
 
-#define XEHP_FLAT_CCS_BASE_ADDR                        _MMIO(0x4910)
+#define XEHP_FLAT_CCS_BASE_ADDR                        MCR_REG(0x4910)
 #define   XEHP_CCS_BASE_SHIFT                  8
 
 #define GAMTARBMODE                            _MMIO(0x4a08)
 #define CHICKEN_RASTER_2                       _MMIO(0x6208)
 #define   TBIMR_FAST_CLIP                      REG_BIT(5)
 
-#define VFLSKPD                                        _MMIO(0x62a8)
+#define VFLSKPD                                        MCR_REG(0x62a8)
 #define   DIS_OVER_FETCH_CACHE                 REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH              REG_BIT(0)
 
-#define FF_MODE2                               _MMIO(0x6604)
+#define GEN12_FF_MODE2                         _MMIO(0x6604)
+#define XEHP_FF_MODE2                          MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK               REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224                        REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK              REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128               REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define XEHPG_INSTDONE_GEOM_SVG                        _MMIO(0x666c)
+#define XEHPG_INSTDONE_GEOM_SVG                        MCR_REG(0x666c)
 
 #define CACHE_MODE_0_GEN7                      _MMIO(0x7000) /* IVB+ */
 #define   RC_OP_FLUSH_ENABLE                   (1 << 0)
 #define HIZ_CHICKEN                            _MMIO(0x7018)
 #define   CHV_HZ_8X8_MODE_IN_1X                        REG_BIT(15)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE      REG_BIT(13)
 #define   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE  REG_BIT(3)
 
 #define GEN8_L3CNTLREG                         _MMIO(0x7034)
 #define GEN8_HDC_CHICKEN1                      _MMIO(0x7304)
 
 #define GEN11_COMMON_SLICE_CHICKEN3            _MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3             MCR_REG(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN     REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE    REG_BIT(12)
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE   REG_BIT(9)
 
-/* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0                     _MMIO(0x7308)
-#define   PIXEL_MASK_CAMMING_DISABLE           (1 << 14)
-
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0         _MMIO(0x7308)
-#define   DISABLE_PIXEL_MASK_CAMMING           (1 << 14)
-
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1         _MMIO(0x731c)
-#define   GEN11_STATE_CACHE_REDIRECT_TO_CS     (1 << 11)
-
-#define SLICE_COMMON_ECO_CHICKEN1              _MMIO(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1         MCR_REG(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
+#define   GEN11_STATE_CACHE_REDIRECT_TO_CS     (1 << 11)
 
 #define GEN9_SLICE_PGCTL_ACK(slice)            _MMIO(0x804c + (slice) * 0x4)
 #define GEN10_SLICE_PGCTL_ACK(slice)           _MMIO(0x804c + ((slice) / 3) * 0x34 + \
 #define VF_PREEMPTION                          _MMIO(0x83a4)
 #define   PREEMPTION_VERTEX_COUNT              REG_GENMASK(15, 0)
 
+#define VFG_PREEMPTION_CHICKEN                 _MMIO(0x83b4)
+#define   POLYGON_TRIFAN_LINELOOP_DISABLE      REG_BIT(4)
+
 #define GEN8_RC6_CTX_INFO                      _MMIO(0x8504)
 
-#define GEN12_SQCM                             _MMIO(0x8724)
+#define XEHP_SQCM                              MCR_REG(0x8724)
 #define   EN_32B_ACCESS                                REG_BIT(30)
 
 #define HSW_IDICR                              _MMIO(0x9008)
 #define   GEN6_MBCTL_BOOT_FETCH_MECH           (1 << 0)
 
 /* Fuse readout registers for GT */
+#define XEHP_FUSE4                             _MMIO(0x9114)
+#define   GT_L3_EXC_MASK                       REG_GENMASK(6, 4)
 #define        GEN10_MIRROR_FUSE3                      _MMIO(0x9118)
 #define   GEN10_L3BANK_PAIR_COUNT              4
 #define   GEN10_L3BANK_MASK                    0x0F
 
 #define GEN7_MISCCPCTL                         _MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE           (1 << 0)
+
+#define GEN8_MISCCPCTL                         MCR_REG(0x9424)
+#define   GEN8_DOP_CLOCK_GATE_ENABLE           REG_BIT(0)
 #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE   REG_BIT(1)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE     (1 << 2)
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE       (1 << 4)
 #define   GAMTLBVEBOX0_CLKGATE_DIS             REG_BIT(16)
 #define   LTCDD_CLKGATE_DIS                    REG_BIT(10)
 
-#define SLICE_UNIT_LEVEL_CLKGATE               _MMIO(0x94d4)
+#define GEN11_SLICE_UNIT_LEVEL_CLKGATE         _MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE          MCR_REG(0x94d4)
 #define   SARBUNIT_CLKGATE_DIS                 (1 << 5)
 #define   RCCUNIT_CLKGATE_DIS                  (1 << 7)
 #define   MSCUNIT_CLKGATE_DIS                  (1 << 10)
 #define   L3_CLKGATE_DIS                       REG_BIT(16)
 #define   L3_CR2X_CLKGATE_DIS                  REG_BIT(17)
 
-#define SCCGCTL94DC                            _MMIO(0x94dc)
+#define SCCGCTL94DC                            MCR_REG(0x94dc)
 #define   CG3DDISURB                           REG_BIT(14)
 
 #define UNSLICE_UNIT_LEVEL_CLKGATE2            _MMIO(0x94e4)
 #define   VSUNIT_CLKGATE_DIS_TGL               REG_BIT(19)
 #define   PSDUNIT_CLKGATE_DIS                  REG_BIT(5)
 
-#define SUBSLICE_UNIT_LEVEL_CLKGATE            _MMIO(0x9524)
+#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE      MCR_REG(0x9524)
 #define   DSS_ROUTER_CLKGATE_DIS               REG_BIT(28)
 #define   GWUNIT_CLKGATE_DIS                   REG_BIT(16)
 
-#define SUBSLICE_UNIT_LEVEL_CLKGATE2           _MMIO(0x9528)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2           MCR_REG(0x9528)
 #define   CPSSUNIT_CLKGATE_DIS                 REG_BIT(9)
 
-#define SSMCGCTL9530                           _MMIO(0x9530)
+#define SSMCGCTL9530                           MCR_REG(0x9530)
 #define   RTFUNIT_CLKGATE_DIS                  REG_BIT(18)
 
-#define GEN10_DFR_RATIO_EN_AND_CHICKEN         _MMIO(0x9550)
+#define GEN10_DFR_RATIO_EN_AND_CHICKEN         MCR_REG(0x9550)
 #define   DFR_DISABLE                          (1 << 9)
 
-#define INF_UNIT_LEVEL_CLKGATE                 _MMIO(0x9560)
+#define INF_UNIT_LEVEL_CLKGATE                 MCR_REG(0x9560)
 #define   CGPSF_CLKGATE_DIS                    (1 << 3)
 
 #define MICRO_BP0_0                            _MMIO(0x9800)
 #define FORCEWAKE_MEDIA_VDBOX_GEN11(n)         _MMIO(0xa540 + (n) * 4)
 #define FORCEWAKE_MEDIA_VEBOX_GEN11(n)         _MMIO(0xa560 + (n) * 4)
 
+#define FORCEWAKE_REQ_GSC                      _MMIO(0xa618)
+
 #define CHV_POWER_SS0_SIG1                     _MMIO(0xa720)
 #define CHV_POWER_SS0_SIG2                     _MMIO(0xa724)
 #define CHV_POWER_SS1_SIG1                     _MMIO(0xa728)
 
 /* MOCS (Memory Object Control State) registers */
 #define GEN9_LNCFCMOCS(i)                      _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
-#define GEN9_LNCFCMOCS_REG_COUNT               32
+#define XEHP_LNCFCMOCS(i)                      MCR_REG(0xb020 + (i) * 4)
+#define LNCFCMOCS_REG_COUNT                    32
 
 #define GEN7_L3CNTLREG3                                _MMIO(0xb024)
 
 #define GEN7_L3LOG(slice, i)                   _MMIO(0xb070 + (slice) * 0x200 + (i) * 4)
 #define   GEN7_L3LOG_SIZE                      0x80
 
-#define GEN10_SCRATCH_LNCF2                    _MMIO(0xb0a0)
-#define   PMFLUSHDONE_LNICRSDROP               (1 << 20)
-#define   PMFLUSH_GAPL3UNBLOCK                 (1 << 21)
-#define   PMFLUSHDONE_LNEBLK                   (1 << 22)
-
-#define XEHP_L3NODEARBCFG                      _MMIO(0xb0b4)
+#define XEHP_L3NODEARBCFG                      MCR_REG(0xb0b4)
 #define   XEHP_LNESPARE                                REG_BIT(19)
 
-#define GEN8_L3SQCREG1                         _MMIO(0xb100)
+#define GEN8_L3SQCREG1                         MCR_REG(0xb100)
 /*
  * Note that on CHV the following has an off-by-one error wrt. to BSpec.
  * Using the formula in BSpec leads to a hang, while the formula here works
 #define   L3_HIGH_PRIO_CREDITS(x)              (((x) >> 1) << 14)
 #define   L3_PRIO_CREDITS_MASK                 ((0x1f << 19) | (0x1f << 14))
 
-#define GEN10_L3_CHICKEN_MODE_REGISTER         _MMIO(0xb114)
-#define   GEN11_I2M_WRITE_DISABLE              (1 << 28)
-
-#define GEN8_L3SQCREG4                         _MMIO(0xb118)
+#define GEN8_L3SQCREG4                         MCR_REG(0xb118)
 #define   GEN11_LQSC_CLEAN_EVICT_DISABLE       (1 << 6)
 #define   GEN8_LQSC_RO_PERF_DIS                        (1 << 27)
 #define   GEN8_LQSC_FLUSH_COHERENT_LINES       (1 << 21)
 #define   GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE      REG_BIT(22)
 
-#define GEN9_SCRATCH1                          _MMIO(0xb11c)
+#define GEN9_SCRATCH1                          MCR_REG(0xb11c)
 #define   EVICTION_PERF_FIX_ENABLE             REG_BIT(8)
 
-#define BDW_SCRATCH1                           _MMIO(0xb11c)
+#define BDW_SCRATCH1                           MCR_REG(0xb11c)
 #define   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE    (1 << 2)
 
-#define GEN11_SCRATCH2                         _MMIO(0xb140)
+#define GEN11_SCRATCH2                         MCR_REG(0xb140)
 #define   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE    (1 << 19)
 
-#define GEN11_L3SQCREG5                                _MMIO(0xb158)
+#define XEHP_L3SQCREG5                         MCR_REG(0xb158)
 #define   L3_PWM_TIMER_INIT_VAL_MASK           REG_GENMASK(9, 0)
 
-#define MLTICTXCTL                             _MMIO(0xb170)
+#define MLTICTXCTL                             MCR_REG(0xb170)
 #define   TDONRENDER                           REG_BIT(2)
 
-#define XEHP_L3SCQREG7                         _MMIO(0xb188)
+#define XEHP_L3SCQREG7                         MCR_REG(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS           REG_BIT(3)
 
 #define XEHPC_L3SCRUB                          _MMIO(0xb18c)
 #define   SCRUB_RATE_PER_BANK_MASK             REG_GENMASK(2, 0)
 #define   SCRUB_RATE_4B_PER_CLK                        REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
 
-#define L3SQCREG1_CCS0                         _MMIO(0xb200)
+#define L3SQCREG1_CCS0                         MCR_REG(0xb200)
 #define   FLUSHALLNONCOH                       REG_BIT(5)
 
 #define GEN11_GLBLINVL                         _MMIO(0xb404)
 #define GEN9_BLT_MOCS(i)                       _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
 
 #define GEN12_FAULT_TLB_DATA0                  _MMIO(0xceb8)
+#define XEHP_FAULT_TLB_DATA0                   MCR_REG(0xceb8)
 #define GEN12_FAULT_TLB_DATA1                  _MMIO(0xcebc)
+#define XEHP_FAULT_TLB_DATA1                   MCR_REG(0xcebc)
 #define   FAULT_VA_HIGH_BITS                   (0xf << 0)
 #define   FAULT_GTT_SEL                                (1 << 4)
 
 #define GEN12_RING_FAULT_REG                   _MMIO(0xcec4)
+#define XEHP_RING_FAULT_REG                    MCR_REG(0xcec4)
 #define   GEN8_RING_FAULT_ENGINE_ID(x)         (((x) >> 12) & 0x7)
 #define   RING_FAULT_GTTSEL_MASK               (1 << 11)
 #define   RING_FAULT_SRCID(x)                  (((x) >> 3) & 0xff)
 #define   RING_FAULT_VALID                     (1 << 0)
 
 #define GEN12_GFX_TLB_INV_CR                   _MMIO(0xced8)
+#define XEHP_GFX_TLB_INV_CR                    MCR_REG(0xced8)
 #define GEN12_VD_TLB_INV_CR                    _MMIO(0xcedc)
+#define XEHP_VD_TLB_INV_CR                     MCR_REG(0xcedc)
 #define GEN12_VE_TLB_INV_CR                    _MMIO(0xcee0)
+#define XEHP_VE_TLB_INV_CR                     MCR_REG(0xcee0)
 #define GEN12_BLT_TLB_INV_CR                   _MMIO(0xcee4)
+#define XEHP_BLT_TLB_INV_CR                    MCR_REG(0xcee4)
 #define GEN12_COMPCTX_TLB_INV_CR               _MMIO(0xcf04)
+#define XEHP_COMPCTX_TLB_INV_CR                        MCR_REG(0xcf04)
 
-#define GEN12_MERT_MOD_CTRL                    _MMIO(0xcf28)
-#define RENDER_MOD_CTRL                                _MMIO(0xcf2c)
-#define COMP_MOD_CTRL                          _MMIO(0xcf30)
-#define VDBX_MOD_CTRL                          _MMIO(0xcf34)
-#define VEBX_MOD_CTRL                          _MMIO(0xcf38)
+#define XEHP_MERT_MOD_CTRL                     MCR_REG(0xcf28)
+#define RENDER_MOD_CTRL                                MCR_REG(0xcf2c)
+#define COMP_MOD_CTRL                          MCR_REG(0xcf30)
+#define VDBX_MOD_CTRL                          MCR_REG(0xcf34)
+#define VEBX_MOD_CTRL                          MCR_REG(0xcf38)
 #define   FORCE_MISS_FTLB                      REG_BIT(3)
 
 #define GEN12_GAMSTLB_CTRL                     _MMIO(0xcf4c)
 #define GEN12_GAM_DONE                         _MMIO(0xcf68)
 
 #define GEN7_HALF_SLICE_CHICKEN1               _MMIO(0xe100) /* IVB GT1 + VLV */
+#define GEN8_HALF_SLICE_CHICKEN1               MCR_REG(0xe100)
 #define   GEN7_MAX_PS_THREAD_DEP               (8 << 12)
 #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE  (1 << 10)
 #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE      (1 << 4)
 #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3)
 
 #define GEN7_SAMPLER_INSTDONE                  _MMIO(0xe160)
+#define GEN8_SAMPLER_INSTDONE                  MCR_REG(0xe160)
 #define GEN7_ROW_INSTDONE                      _MMIO(0xe164)
+#define GEN8_ROW_INSTDONE                      MCR_REG(0xe164)
 
-#define HALF_SLICE_CHICKEN2                    _MMIO(0xe180)
+#define HALF_SLICE_CHICKEN2                    MCR_REG(0xe180)
 #define   GEN8_ST_PO_DISABLE                   (1 << 13)
 
-#define HALF_SLICE_CHICKEN3                    _MMIO(0xe184)
+#define HSW_HALF_SLICE_CHICKEN3                        _MMIO(0xe184)
+#define GEN8_HALF_SLICE_CHICKEN3               MCR_REG(0xe184)
 #define   HSW_SAMPLE_C_PERFORMANCE             (1 << 9)
 #define   GEN8_CENTROID_PIXEL_OPT_DIS          (1 << 8)
 #define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC  (1 << 5)
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS                (1 << 1)
 
-#define GEN9_HALF_SLICE_CHICKEN5               _MMIO(0xe188)
+#define GEN9_HALF_SLICE_CHICKEN5               MCR_REG(0xe188)
 #define   GEN9_DG_MIRROR_FIX_ENABLE            (1 << 5)
 #define   GEN9_CCS_TLB_PREFETCH_ENABLE         (1 << 3)
 
-#define GEN10_SAMPLER_MODE                     _MMIO(0xe18c)
+#define GEN10_SAMPLER_MODE                     MCR_REG(0xe18c)
 #define   ENABLE_SMALLPL                       REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB    REG_BIT(9)
 #define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG    REG_BIT(5)
 
-#define GEN9_HALF_SLICE_CHICKEN7               _MMIO(0xe194)
+#define GEN9_HALF_SLICE_CHICKEN7               MCR_REG(0xe194)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA      REG_BIT(15)
 #define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR       REG_BIT(8)
 #define   GEN9_ENABLE_YV12_BUGFIX              REG_BIT(4)
 #define   GEN9_ENABLE_GPGPU_PREEMPTION         REG_BIT(2)
 
-#define GEN10_CACHE_MODE_SS                    _MMIO(0xe420)
+#define GEN10_CACHE_MODE_SS                    MCR_REG(0xe420)
 #define   ENABLE_EU_COUNT_FOR_TDL_FLUSH                REG_BIT(10)
 #define   DISABLE_ECC                          REG_BIT(5)
 #define   FLOAT_BLEND_OPTIMIZATION_ENABLE      REG_BIT(4)
 #define   ENABLE_PREFETCH_INTO_IC              REG_BIT(3)
 
-#define EU_PERF_CNTL0                          _MMIO(0xe458)
-#define EU_PERF_CNTL4                          _MMIO(0xe45c)
+#define EU_PERF_CNTL0                          PERF_REG(0xe458)
+#define EU_PERF_CNTL4                          PERF_REG(0xe45c)
 
-#define GEN9_ROW_CHICKEN4                      _MMIO(0xe48c)
+#define GEN9_ROW_CHICKEN4                      MCR_REG(0xe48c)
 #define   GEN12_DISABLE_GRF_CLEAR              REG_BIT(13)
 #define   XEHP_DIS_BBL_SYSPIPE                 REG_BIT(11)
 #define   GEN12_DISABLE_TDL_PUSH               REG_BIT(9)
 #define HSW_ROW_CHICKEN3                       _MMIO(0xe49c)
 #define   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE   (1 << 6)
 
-#define GEN8_ROW_CHICKEN                       _MMIO(0xe4f0)
+#define GEN8_ROW_CHICKEN                       MCR_REG(0xe4f0)
 #define   FLOW_CONTROL_ENABLE                  REG_BIT(15)
 #define   UGM_BACKUP_MODE                      REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE                 REG_BIT(12)
 #define   DISABLE_EARLY_EOT                    REG_BIT(1)
 
 #define GEN7_ROW_CHICKEN2                      _MMIO(0xe4f4)
+
+#define GEN8_ROW_CHICKEN2                      MCR_REG(0xe4f4)
 #define   GEN12_DISABLE_READ_SUPPRESSION       REG_BIT(15)
 #define   GEN12_DISABLE_EARLY_READ             REG_BIT(14)
 #define   GEN12_ENABLE_LARGE_GRF_MODE          REG_BIT(12)
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS      REG_BIT(8)
+#define   GEN12_DISABLE_DOP_GATING              REG_BIT(0)
 
-#define RT_CTRL                                        _MMIO(0xe530)
+#define RT_CTRL                                        MCR_REG(0xe530)
 #define   DIS_NULL_QUERY                       REG_BIT(10)
 #define   STACKID_CTRL                         REG_GENMASK(6, 5)
 #define   STACKID_CTRL_512                     REG_FIELD_PREP(STACKID_CTRL, 0x2)
 
-#define EU_PERF_CNTL1                          _MMIO(0xe558)
-#define EU_PERF_CNTL5                          _MMIO(0xe55c)
+#define EU_PERF_CNTL1                          PERF_REG(0xe558)
+#define EU_PERF_CNTL5                          PERF_REG(0xe55c)
 
-#define GEN12_HDC_CHICKEN0                     _MMIO(0xe5f0)
+#define XEHP_HDC_CHICKEN0                      MCR_REG(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK       REG_GENMASK(13, 11)
-#define ICL_HDC_MODE                           _MMIO(0xe5f4)
+#define ICL_HDC_MODE                           MCR_REG(0xe5f4)
 
-#define EU_PERF_CNTL2                          _MMIO(0xe658)
-#define EU_PERF_CNTL6                          _MMIO(0xe65c)
-#define EU_PERF_CNTL3                          _MMIO(0xe758)
+#define EU_PERF_CNTL2                          PERF_REG(0xe658)
+#define EU_PERF_CNTL6                          PERF_REG(0xe65c)
+#define EU_PERF_CNTL3                          PERF_REG(0xe758)
 
-#define LSC_CHICKEN_BIT_0                      _MMIO(0xe7c8)
+#define LSC_CHICKEN_BIT_0                      MCR_REG(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE             REG_BIT(30)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT     REG_BIT(15)
-#define LSC_CHICKEN_BIT_0_UDW                  _MMIO(0xe7c8 + 4)
+#define LSC_CHICKEN_BIT_0_UDW                  MCR_REG(0xe7c8 + 4)
 #define   DIS_CHAIN_2XSIMD8                    REG_BIT(55 - 32)
 #define   FORCE_SLM_FENCE_SCOPE_TO_TILE                REG_BIT(42 - 32)
 #define   FORCE_UGM_FENCE_SCOPE_TO_TILE                REG_BIT(41 - 32)
 #define   MAXREQS_PER_BANK                     REG_GENMASK(39 - 32, 37 - 32)
 #define   DISABLE_128B_EVICTION_COMMAND_UDW    REG_BIT(36 - 32)
 
-#define SARB_CHICKEN1                          _MMIO(0xe90c)
+#define SARB_CHICKEN1                          MCR_REG(0xe90c)
 #define   COMP_CKN_IN                          REG_GENMASK(30, 29)
 
-#define GEN7_HALF_SLICE_CHICKEN1_GT2           _MMIO(0xf100)
-
 #define GEN7_ROW_CHICKEN2_GT2                  _MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE             (1 << 0)
 #define   PUSH_CONSTANT_DEREF_DISABLE          (1 << 8)
 #define VLV_RENDER_C0_COUNT                    _MMIO(0x138118)
 #define VLV_MEDIA_C0_COUNT                     _MMIO(0x13811c)
 
+#define GEN12_RPSTAT1                          _MMIO(0x1381b4)
+#define   GEN12_VOLTAGE_MASK                   REG_GENMASK(10, 0)
+
 #define GEN11_GT_INTR_DW(x)                    _MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME                           (31)
 #define   GEN11_GUNIT                          (28)
 
 #define GEN12_SFC_DONE(n)                      _MMIO(0x1cc000 + (n) * 0x1000)
 
+#define GT0_PACKAGE_ENERGY_STATUS              _MMIO(0x250004)
+#define GT0_PACKAGE_RAPL_LIMIT                 _MMIO(0x250008)
+#define GT0_PACKAGE_POWER_SKU_UNIT             _MMIO(0x250068)
+#define GT0_PLATFORM_ENERGY_STATUS             _MMIO(0x25006c)
+
 /*
  * Standalone Media's non-engine GT registers are located at their regular GT
  * offsets plus 0x380000.  This extra offset is stored inside the intel_uncore
index d651ccd0ab20b0cebcac0484dbcf06d07fe7e452..9486dd3bed991231c48d7dfa367c4f4490892868 100644 (file)
@@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj)
        return !strncmp(kobj->name, "gt", 2);
 }
 
-struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
                                            const char *name)
 {
-       struct kobject *kobj = &dev->kobj;
-
        /*
         * We are interested at knowing from where the interface
         * has been called, whether it's called from gt/ or from
@@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
         * "struct drm_i915_private *" type.
         */
        if (!is_object_gt(kobj)) {
+               struct device *dev = kobj_to_dev(kobj);
                struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
 
                return to_gt(i915);
@@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
        return &gt->i915->drm.primary->kdev->kobj;
 }
 
-static ssize_t id_show(struct device *dev,
-                      struct device_attribute *attr,
+static ssize_t id_show(struct kobject *kobj,
+                      struct kobj_attribute *attr,
                       char *buf)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
 
        return sysfs_emit(buf, "%u\n", gt->info.id);
 }
-static DEVICE_ATTR_RO(id);
+static struct kobj_attribute attr_id = __ATTR_RO(id);
 
 static struct attribute *id_attrs[] = {
-       &dev_attr_id.attr,
+       &attr_id.attr,
        NULL,
 };
 ATTRIBUTE_GROUPS(id);
index 6232923a420d029f8ab15d9a90b79553eac1a6bf..18bab835be02ea6c0d4fd45a0421b19aae9609ee 100644 (file)
@@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj);
 
 struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
 
-struct kobject *
-intel_gt_create_kobj(struct intel_gt *gt,
-                    struct kobject *dir,
-                    const char *name);
-
 static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
 {
        return container_of(kobj, struct intel_gt, sysfs_gt);
@@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
 
 void intel_gt_sysfs_register(struct intel_gt *gt);
 void intel_gt_sysfs_unregister(struct intel_gt *gt);
-struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
                                            const char *name);
 
 #endif /* SYSFS_GT_H */
index 180dd6f3ef57148c56d0adb1fcfa398c3162e129..2b5f05b3118712f9b8f898dc3556682bab56bf81 100644 (file)
@@ -24,14 +24,15 @@ enum intel_gt_sysfs_op {
 };
 
 static int
-sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
+sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr,
                          int (func)(struct intel_gt *gt, u32 val), u32 val)
 {
        struct intel_gt *gt;
        int ret;
 
-       if (!is_object_gt(&dev->kobj)) {
+       if (!is_object_gt(kobj)) {
                int i;
+               struct device *dev = kobj_to_dev(kobj);
                struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
 
                for_each_gt(gt, i915, i) {
@@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
                                break;
                }
        } else {
-               gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+               gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
                ret = func(gt, val);
        }
 
@@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
 }
 
 static u32
-sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
+sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr,
                          u32 (func)(struct intel_gt *gt),
                          enum intel_gt_sysfs_op op)
 {
@@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
 
        ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
 
-       if (!is_object_gt(&dev->kobj)) {
+       if (!is_object_gt(kobj)) {
                int i;
+               struct device *dev = kobj_to_dev(kobj);
                struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
 
                for_each_gt(gt, i915, i) {
@@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
                        }
                }
        } else {
-               gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+               gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
                ret = func(gt);
        }
 
@@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
 #define sysfs_gt_attribute_r_max_func(d, a, f) \
                sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
+#define INTEL_GT_SYSFS_SHOW(_name, _attr_type)                                                 \
+       static ssize_t _name##_show_common(struct kobject *kobj,                                \
+                                          struct attribute *attr, char *buff)                  \
+       {                                                                                       \
+               u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr,                  \
+                                                                  __##_name##_show);           \
+                                                                                               \
+               return sysfs_emit(buff, "%u\n", val);                                           \
+       }                                                                                       \
+       static ssize_t _name##_show(struct kobject *kobj,                                       \
+                                   struct kobj_attribute *attr, char *buff)                    \
+       {                                                                                       \
+               return _name ##_show_common(kobj, &attr->attr, buff);                           \
+       }                                                                                       \
+       static ssize_t _name##_dev_show(struct device *dev,                                     \
+                                       struct device_attribute *attr, char *buff)              \
+       {                                                                                       \
+               return _name##_show_common(&dev->kobj, &attr->attr, buff);                      \
+       }
+
+#define INTEL_GT_SYSFS_STORE(_name, _func)                                             \
+       static ssize_t _name##_store_common(struct kobject *kobj,                       \
+                                           struct attribute *attr,                     \
+                                           const char *buff, size_t count)             \
+       {                                                                               \
+               int ret;                                                                \
+               u32 val;                                                                \
+                                                                                       \
+               ret = kstrtou32(buff, 0, &val);                                         \
+               if (ret)                                                                \
+                       return ret;                                                     \
+                                                                                       \
+               ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val);                \
+                                                                                       \
+               return ret ?: count;                                                    \
+       }                                                                               \
+       static ssize_t _name##_store(struct kobject *kobj,                              \
+                                    struct kobj_attribute *attr, const char *buff,     \
+                                    size_t count)                                      \
+       {                                                                               \
+               return _name##_store_common(kobj, &attr->attr, buff, count);            \
+       }                                                                               \
+       static ssize_t _name##_dev_store(struct device *dev,                            \
+                                        struct device_attribute *attr,                 \
+                                        const char *buff, size_t count)                \
+       {                                                                               \
+               return _name##_store_common(&dev->kobj, &attr->attr, buff, count);      \
+       }
+
+#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max)
+#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min)
+
+#define INTEL_GT_ATTR_RW(_name) \
+       static struct kobj_attribute attr_##_name = __ATTR_RW(_name)
+
+#define INTEL_GT_ATTR_RO(_name) \
+       static struct kobj_attribute attr_##_name = __ATTR_RO(_name)
+
+#define INTEL_GT_DUAL_ATTR_RW(_name) \
+       static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644,           \
+                                                                _name##_dev_show,      \
+                                                                _name##_dev_store);    \
+       INTEL_GT_ATTR_RW(_name)
+
+#define INTEL_GT_DUAL_ATTR_RO(_name) \
+       static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444,           \
+                                                                _name##_dev_show,      \
+                                                                NULL);                 \
+       INTEL_GT_ATTR_RO(_name)
+
 #ifdef CONFIG_PM
 static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
 {
@@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
        return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
 
-static ssize_t rc6_enable_show(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buff)
+static u8 get_rc6_mask(struct intel_gt *gt)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
        u8 mask = 0;
 
        if (HAS_RC6(gt->i915))
@@ -118,37 +187,35 @@ static ssize_t rc6_enable_show(struct device *dev,
        if (HAS_RC6pp(gt->i915))
                mask |= BIT(2);
 
-       return sysfs_emit(buff, "%x\n", mask);
+       return mask;
 }
 
-static u32 __rc6_residency_ms_show(struct intel_gt *gt)
+static ssize_t rc6_enable_show(struct kobject *kobj,
+                              struct kobj_attribute *attr,
+                              char *buff)
 {
-       return get_residency(gt, GEN6_GT_GFX_RC6);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+
+       return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
 }
 
-static ssize_t rc6_residency_ms_show(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buff)
+static ssize_t rc6_enable_dev_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buff)
 {
-       u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
-                                                     __rc6_residency_ms_show);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
 
-       return sysfs_emit(buff, "%u\n", rc6_residency);
+       return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
 }
 
-static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
+static u32 __rc6_residency_ms_show(struct intel_gt *gt)
 {
-       return get_residency(gt, GEN6_GT_GFX_RC6p);
+       return get_residency(gt, GEN6_GT_GFX_RC6);
 }
 
-static ssize_t rc6p_residency_ms_show(struct device *dev,
-                                     struct device_attribute *attr,
-                                     char *buff)
+static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
 {
-       u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
-                                               __rc6p_residency_ms_show);
-
-       return sysfs_emit(buff, "%u\n", rc6p_residency);
+       return get_residency(gt, GEN6_GT_GFX_RC6p);
 }
 
 static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
@@ -156,67 +223,69 @@ static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
        return get_residency(gt, GEN6_GT_GFX_RC6pp);
 }
 
-static ssize_t rc6pp_residency_ms_show(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buff)
-{
-       u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
-                                               __rc6pp_residency_ms_show);
-
-       return sysfs_emit(buff, "%u\n", rc6pp_residency);
-}
-
 static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
 {
        return get_residency(gt, VLV_GT_MEDIA_RC6);
 }
 
-static ssize_t media_rc6_residency_ms_show(struct device *dev,
-                                          struct device_attribute *attr,
-                                          char *buff)
-{
-       u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
-                                               __media_rc6_residency_ms_show);
+INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms);
 
-       return sysfs_emit(buff, "%u\n", rc6_residency);
-}
-
-static DEVICE_ATTR_RO(rc6_enable);
-static DEVICE_ATTR_RO(rc6_residency_ms);
-static DEVICE_ATTR_RO(rc6p_residency_ms);
-static DEVICE_ATTR_RO(rc6pp_residency_ms);
-static DEVICE_ATTR_RO(media_rc6_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6_enable);
+INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms);
 
 static struct attribute *rc6_attrs[] = {
+       &attr_rc6_enable.attr,
+       &attr_rc6_residency_ms.attr,
+       NULL
+};
+
+static struct attribute *rc6p_attrs[] = {
+       &attr_rc6p_residency_ms.attr,
+       &attr_rc6pp_residency_ms.attr,
+       NULL
+};
+
+static struct attribute *media_rc6_attrs[] = {
+       &attr_media_rc6_residency_ms.attr,
+       NULL
+};
+
+static struct attribute *rc6_dev_attrs[] = {
        &dev_attr_rc6_enable.attr,
        &dev_attr_rc6_residency_ms.attr,
        NULL
 };
 
-static struct attribute *rc6p_attrs[] = {
+static struct attribute *rc6p_dev_attrs[] = {
        &dev_attr_rc6p_residency_ms.attr,
        &dev_attr_rc6pp_residency_ms.attr,
        NULL
 };
 
-static struct attribute *media_rc6_attrs[] = {
+static struct attribute *media_rc6_dev_attrs[] = {
        &dev_attr_media_rc6_residency_ms.attr,
        NULL
 };
 
 static const struct attribute_group rc6_attr_group[] = {
        { .attrs = rc6_attrs, },
-       { .name = power_group_name, .attrs = rc6_attrs, },
+       { .name = power_group_name, .attrs = rc6_dev_attrs, },
 };
 
 static const struct attribute_group rc6p_attr_group[] = {
        { .attrs = rc6p_attrs, },
-       { .name = power_group_name, .attrs = rc6p_attrs, },
+       { .name = power_group_name, .attrs = rc6p_dev_attrs, },
 };
 
 static const struct attribute_group media_rc6_attr_group[] = {
        { .attrs = media_rc6_attrs, },
-       { .name = power_group_name, .attrs = media_rc6_attrs, },
+       { .name = power_group_name, .attrs = media_rc6_dev_attrs, },
 };
 
 static int __intel_gt_sysfs_create_group(struct kobject *kobj,
@@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt)
        return intel_rps_read_actual_frequency(&gt->rps);
 }
 
-static ssize_t act_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                   __act_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", actual_freq);
-}
-
 static u32 __cur_freq_mhz_show(struct intel_gt *gt)
 {
        return intel_rps_get_requested_frequency(&gt->rps);
 }
 
-static ssize_t cur_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                __cur_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", cur_freq);
-}
-
 static u32 __boost_freq_mhz_show(struct intel_gt *gt)
 {
        return intel_rps_get_boost_frequency(&gt->rps);
 }
 
-static ssize_t boost_freq_mhz_show(struct device *dev,
-                                  struct device_attribute *attr,
-                                  char *buff)
-{
-       u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                  __boost_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", boost_freq);
-}
-
 static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
 {
        return intel_rps_set_boost_frequency(&gt->rps, val);
 }
 
-static ssize_t boost_freq_mhz_store(struct device *dev,
-                                   struct device_attribute *attr,
-                                   const char *buff, size_t count)
-{
-       ssize_t ret;
-       u32 val;
-
-       ret = kstrtou32(buff, 0, &val);
-       if (ret)
-               return ret;
-
-       return sysfs_gt_attribute_w_func(dev, attr,
-                                        __boost_freq_mhz_store, val) ?: count;
-}
-
-static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
+static u32 __RP0_freq_mhz_show(struct intel_gt *gt)
 {
        return intel_rps_get_rp0_frequency(&gt->rps);
 }
 
-static ssize_t RP0_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                    __rp0_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", rp0_freq);
-}
-
-static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
-{
-       return intel_rps_get_rp1_frequency(&gt->rps);
-}
-
-static ssize_t RP1_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                    __rp1_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", rp1_freq);
-}
-
-static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
+static u32 __RPn_freq_mhz_show(struct intel_gt *gt)
 {
        return intel_rps_get_rpn_frequency(&gt->rps);
 }
 
-static ssize_t RPn_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
+static u32 __RP1_freq_mhz_show(struct intel_gt *gt)
 {
-       u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                    __rpn_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", rpn_freq);
+       return intel_rps_get_rp1_frequency(&gt->rps);
 }
 
 static u32 __max_freq_mhz_show(struct intel_gt *gt)
@@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt)
        return intel_rps_get_max_frequency(&gt->rps);
 }
 
-static ssize_t max_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                    __max_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", max_freq);
-}
-
 static int __set_max_freq(struct intel_gt *gt, u32 val)
 {
        return intel_rps_set_max_frequency(&gt->rps, val);
 }
 
-static ssize_t max_freq_mhz_store(struct device *dev,
-                                 struct device_attribute *attr,
-                                 const char *buff, size_t count)
-{
-       int ret;
-       u32 val;
-
-       ret = kstrtou32(buff, 0, &val);
-       if (ret)
-               return ret;
-
-       ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
-
-       return ret ?: count;
-}
-
 static u32 __min_freq_mhz_show(struct intel_gt *gt)
 {
        return intel_rps_get_min_frequency(&gt->rps);
 }
 
-static ssize_t min_freq_mhz_show(struct device *dev,
-                                struct device_attribute *attr, char *buff)
-{
-       u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
-                                                    __min_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", min_freq);
-}
-
 static int __set_min_freq(struct intel_gt *gt, u32 val)
 {
        return intel_rps_set_min_frequency(&gt->rps, val);
 }
 
-static ssize_t min_freq_mhz_store(struct device *dev,
-                                 struct device_attribute *attr,
-                                 const char *buff, size_t count)
-{
-       int ret;
-       u32 val;
-
-       ret = kstrtou32(buff, 0, &val);
-       if (ret)
-               return ret;
-
-       ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
-
-       return ret ?: count;
-}
-
 static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
 {
        struct intel_rps *rps = &gt->rps;
@@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
        return intel_gpu_freq(rps, rps->efficient_freq);
 }
 
-static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
-                                    struct device_attribute *attr, char *buff)
-{
-       u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
-                                                __vlv_rpe_freq_mhz_show);
-
-       return sysfs_emit(buff, "%u\n", rpe_freq);
-}
-
-#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
-       static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
-       static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
-
-#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name)                              \
-               INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
-#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name)                              \
-               INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
+INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz);
+INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store);
+INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq);
+INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq);
+
+#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev)            \
+       static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode,          \
+                                                                   _show_dev, _store_dev);     \
+       static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode,              \
+                                                              _show, _store)
+
+#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name)                                              \
+               INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL,                \
+                                       _name##_dev_show, NULL)
+#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name)                                              \
+               INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store,       \
+                                       _name##_dev_show, _name##_dev_store)
 
 /* The below macros generate static structures */
 INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
@@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
 INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
 INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
 INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
-
-static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
-
-#define GEN6_ATTR(s) { \
-               &dev_attr_##s##_act_freq_mhz.attr, \
-               &dev_attr_##s##_cur_freq_mhz.attr, \
-               &dev_attr_##s##_boost_freq_mhz.attr, \
-               &dev_attr_##s##_max_freq_mhz.attr, \
-               &dev_attr_##s##_min_freq_mhz.attr, \
-               &dev_attr_##s##_RP0_freq_mhz.attr, \
-               &dev_attr_##s##_RP1_freq_mhz.attr, \
-               &dev_attr_##s##_RPn_freq_mhz.attr, \
+INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz);
+
+#define GEN6_ATTR(p, s) { \
+               &p##attr_##s##_act_freq_mhz.attr, \
+               &p##attr_##s##_cur_freq_mhz.attr, \
+               &p##attr_##s##_boost_freq_mhz.attr, \
+               &p##attr_##s##_max_freq_mhz.attr, \
+               &p##attr_##s##_min_freq_mhz.attr, \
+               &p##attr_##s##_RP0_freq_mhz.attr, \
+               &p##attr_##s##_RP1_freq_mhz.attr, \
+               &p##attr_##s##_RPn_freq_mhz.attr, \
                NULL, \
        }
 
-#define GEN6_RPS_ATTR GEN6_ATTR(rps)
-#define GEN6_GT_ATTR  GEN6_ATTR(gt)
+#define GEN6_RPS_ATTR GEN6_ATTR(rps)
+#define GEN6_GT_ATTR  GEN6_ATTR(dev_, gt)
 
 static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
 static const struct attribute * const gen6_gt_attrs[]  = GEN6_GT_ATTR;
 
-static ssize_t punit_req_freq_mhz_show(struct device *dev,
-                                      struct device_attribute *attr,
+static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr,
                                       char *buff)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        u32 preq = intel_rps_read_punit_req_frequency(&gt->rps);
 
        return sysfs_emit(buff, "%u\n", preq);
@@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev,
 
 struct intel_gt_bool_throttle_attr {
        struct attribute attr;
-       ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+       ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
                        char *buf);
-       i915_reg_t reg32;
+       i915_reg_t (*reg32)(struct intel_gt *gt);
        u32 mask;
 };
 
-static ssize_t throttle_reason_bool_show(struct device *dev,
-                                        struct device_attribute *attr,
+static ssize_t throttle_reason_bool_show(struct kobject *kobj,
+                                        struct kobj_attribute *attr,
                                         char *buff)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        struct intel_gt_bool_throttle_attr *t_attr =
                                (struct intel_gt_bool_throttle_attr *) attr;
-       bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32, t_attr->mask);
+       bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32(gt), t_attr->mask);
 
        return sysfs_emit(buff, "%u\n", val);
 }
@@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev,
 struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
        .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
        .show = throttle_reason_bool_show, \
-       .reg32 = GT0_PERF_LIMIT_REASONS, \
+       .reg32 = intel_gt_perf_limit_reasons_reg, \
        .mask = mask__, \
 }
 
-static DEVICE_ATTR_RO(punit_req_freq_mhz);
+INTEL_GT_ATTR_RO(punit_req_freq_mhz);
 static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
 static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
 static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
@@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = {
 #define U8_8_VAL_MASK           0xffff
 #define U8_8_SCALE_TO_VALUE     "0.00390625"
 
-static ssize_t freq_factor_scale_show(struct device *dev,
-                                     struct device_attribute *attr,
+static ssize_t freq_factor_scale_show(struct kobject *kobj,
+                                     struct kobj_attribute *attr,
                                      char *buff)
 {
        return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE);
@@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode)
        return !mode ? mode : 256 / mode;
 }
 
-static ssize_t media_freq_factor_show(struct device *dev,
-                                     struct device_attribute *attr,
+static ssize_t media_freq_factor_show(struct kobject *kobj,
+                                     struct kobj_attribute *attr,
                                      char *buff)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
        intel_wakeref_t wakeref;
        u32 mode;
@@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev,
        return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
 }
 
-static ssize_t media_freq_factor_store(struct device *dev,
-                                      struct device_attribute *attr,
+static ssize_t media_freq_factor_store(struct kobject *kobj,
+                                      struct kobj_attribute *attr,
                                       const char *buff, size_t count)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
        u32 factor, mode;
        int err;
@@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev,
        return err ?: count;
 }
 
-static ssize_t media_RP0_freq_mhz_show(struct device *dev,
-                                      struct device_attribute *attr,
+static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr,
                                       char *buff)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        u32 val;
        int err;
 
@@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev,
        return sysfs_emit(buff, "%u\n", val);
 }
 
-static ssize_t media_RPn_freq_mhz_show(struct device *dev,
-                                      struct device_attribute *attr,
+static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr,
                                       char *buff)
 {
-       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
        u32 val;
        int err;
 
@@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev,
        return sysfs_emit(buff, "%u\n", val);
 }
 
-static DEVICE_ATTR_RW(media_freq_factor);
-static struct device_attribute dev_attr_media_freq_factor_scale =
+INTEL_GT_ATTR_RW(media_freq_factor);
+static struct kobj_attribute attr_media_freq_factor_scale =
        __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL);
-static DEVICE_ATTR_RO(media_RP0_freq_mhz);
-static DEVICE_ATTR_RO(media_RPn_freq_mhz);
+INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
+INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
 
 static const struct attribute *media_perf_power_attrs[] = {
-       &dev_attr_media_freq_factor.attr,
-       &dev_attr_media_freq_factor_scale.attr,
-       &dev_attr_media_RP0_freq_mhz.attr,
-       &dev_attr_media_RPn_freq_mhz.attr,
+       &attr_media_freq_factor.attr,
+       &attr_media_freq_factor_scale.attr,
+       &attr_media_RP0_freq_mhz.attr,
+       &attr_media_RPn_freq_mhz.attr,
        NULL
 };
 
@@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = {
        NULL
 };
 
-static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
-                               const struct attribute * const *attrs)
+static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
 {
+       const struct attribute * const *attrs;
+       struct attribute *vlv_attr;
        int ret;
 
        if (GRAPHICS_VER(gt->i915) < 6)
                return 0;
 
+       if (is_object_gt(kobj)) {
+               attrs = gen6_rps_attrs;
+               vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr;
+       } else {
+               attrs = gen6_gt_attrs;
+               vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr;
+       }
+
        ret = sysfs_create_files(kobj, attrs);
        if (ret)
                return ret;
 
        if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
-               ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
+               ret = sysfs_create_file(kobj, vlv_attr);
 
        return ret;
 }
@@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
 
        intel_sysfs_rc6_init(gt, kobj);
 
-       ret = is_object_gt(kobj) ?
-             intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
-             intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
+       ret = intel_sysfs_rps_init(gt, kobj);
        if (ret)
                drm_warn(&gt->i915->drm,
                         "failed to create gt%u RPS sysfs files (%pe)",
@@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
        if (!is_object_gt(kobj))
                return;
 
-       ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
+       ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr);
        if (ret)
                drm_warn(&gt->i915->drm,
                         "failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
                         gt->info.id, ERR_PTR(ret));
 
-       if (GRAPHICS_VER(gt->i915) >= 11) {
+       if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
                ret = sysfs_create_files(kobj, throttle_reason_attrs);
                if (ret)
                        drm_warn(&gt->i915->drm,
index f19c2de77ff667ecd99cd242ef78d3f18a97c8ba..a0cc73b401eff9333376365695483edbd9588581 100644 (file)
@@ -20,6 +20,7 @@
 #include "intel_gsc.h"
 
 #include "i915_vma.h"
+#include "i915_perf_types.h"
 #include "intel_engine_types.h"
 #include "intel_gt_buffer_pool_types.h"
 #include "intel_hwconfig.h"
@@ -59,6 +60,9 @@ enum intel_steering_type {
        L3BANK,
        MSLICE,
        LNCF,
+       GAM,
+       DSS,
+       OADDRM,
 
        /*
         * On some platforms there are multiple types of MCR registers that
@@ -141,20 +145,6 @@ struct intel_gt {
        struct intel_wakeref wakeref;
        atomic_t user_wakeref;
 
-       /**
-        *  Protects access to lmem usefault list.
-        *  It is required, if we are outside of the runtime suspend path,
-        *  access to @lmem_userfault_list requires always first grabbing the
-        *  runtime pm, to ensure we can't race against runtime suspend.
-        *  Once we have that we also need to grab @lmem_userfault_lock,
-        *  at which point we have exclusive access.
-        *  The runtime suspend path is special since it doesn't really hold any locks,
-        *  but instead has exclusive access by virtue of all other accesses requiring
-        *  holding the runtime pm wakeref.
-        */
-       struct mutex lmem_userfault_lock;
-       struct list_head lmem_userfault_list;
-
        struct list_head closed_vma;
        spinlock_t closed_lock; /* guards the list of closed_vma */
 
@@ -170,9 +160,6 @@ struct intel_gt {
         */
        intel_wakeref_t awake;
 
-       /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
-       struct intel_wakeref_auto userfault_wakeref;
-
        u32 clock_frequency;
        u32 clock_period_ns;
 
@@ -286,6 +273,8 @@ struct intel_gt {
        /* sysfs defaults per gt */
        struct gt_defaults defaults;
        struct kobject *sysfs_defaults;
+
+       struct i915_perf_gt perf;
 };
 
 struct intel_gt_definition {
index 2eaeba14319e997c34fd8a58c8cd03786d729638..e82a9d763e578fcc84f36841ca78d1e055163964 100644 (file)
@@ -15,6 +15,7 @@
 #include "i915_trace.h"
 #include "i915_utils.h"
 #include "intel_gt.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_regs.h"
 #include "intel_gtt.h"
 
@@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
        memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
                 ARRAY_SIZE(vm->min_alignment));
 
-       if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
-           subclass == VM_CLASS_PPGTT) {
-               vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
-               vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
-       } else if (HAS_64K_PAGES(vm->i915)) {
+       if (HAS_64K_PAGES(vm->i915)) {
                vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
                vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
        }
@@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm)
         */
        size = I915_GTT_PAGE_SIZE_4K;
        if (i915_vm_is_4lvl(vm) &&
-           HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
+           HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
+           !HAS_64K_PAGES(vm->i915))
                size = I915_GTT_PAGE_SIZE_64K;
 
        do {
@@ -385,18 +383,6 @@ skip:
                if (size == I915_GTT_PAGE_SIZE_4K)
                        return -ENOMEM;
 
-               /*
-                * If we need 64K minimum GTT pages for device local-memory,
-                * like on XEHPSDV, then we need to fail the allocation here,
-                * otherwise we can't safely support the insertion of
-                * local-memory pages for this vm, since the HW expects the
-                * correct physical alignment and size when the page-table is
-                * operating in 64K GTT mode, which includes any scratch PTEs,
-                * since userspace can still touch them.
-                */
-               if (HAS_64K_PAGES(vm->i915))
-                       return -ENOMEM;
-
                size = I915_GTT_PAGE_SIZE_4K;
        } while (1);
 }
@@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore)
        intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
 }
 
+static void xehp_setup_private_ppat(struct intel_gt *gt)
+{
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
+       intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
 static void icl_setup_private_ppat(struct intel_uncore *uncore)
 {
        intel_uncore_write(uncore,
@@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore)
        intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
 }
 
-void setup_private_pat(struct intel_uncore *uncore)
+void setup_private_pat(struct intel_gt *gt)
 {
-       struct drm_i915_private *i915 = uncore->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct drm_i915_private *i915 = gt->i915;
 
        GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-       if (GRAPHICS_VER(i915) >= 12)
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+               xehp_setup_private_ppat(gt);
+       else if (GRAPHICS_VER(i915) >= 12)
                tgl_setup_private_ppat(uncore);
        else if (GRAPHICS_VER(i915) >= 11)
                icl_setup_private_ppat(uncore);
index c0ca53cba9f08f8c0d11cfd126aa822269618508..4d75ba4bb41dae5774f8986c1708411f9119dbf8 100644 (file)
@@ -93,6 +93,7 @@ typedef u64 gen8_pte_t;
 #define GEN12_GGTT_PTE_LM      BIT_ULL(1)
 
 #define GEN12_PDE_64K BIT(6)
+#define GEN12_PTE_PS64 BIT(8)
 
 /*
  * Cacheability Control is a 4-bit value. The low three bits are stored in bits
@@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
 
 void gtt_write_workarounds(struct intel_gt *gt);
 
-void setup_private_pat(struct intel_uncore *uncore);
+void setup_private_pat(struct intel_gt *gt);
 
 int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
                           struct i915_vm_pt_stash *stash,
index 3955292483a6f13b2ce547807c4554208b72c8c1..7771a19008c60459c89ea613caf15b255d0fc37f 100644 (file)
 #include "intel_ring.h"
 #include "shmem_utils.h"
 
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ *      MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ *        MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ *      follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
 static void set_offsets(u32 *regs,
                        const u8 *data,
                        const struct intel_engine_cs *engine,
@@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = {
        END
 };
 
+static const u8 mtl_xcs_offsets[] = {
+       NOP(1),
+       LRI(13, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+       NOP(4),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       END
+};
+
 static const u8 gen8_rcs_offsets[] = {
        NOP(1),
        LRI(14, POSTED),
@@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = {
        END
 };
 
+static const u8 mtl_rcs_offsets[] = {
+       NOP(1),
+       LRI(15, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+       REG(0x120),
+       REG(0x124),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(2),
+       LRI(2, POSTED),
+       REG16(0x5a8),
+       REG16(0x5ac),
+
+       NOP(6),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END
+};
+
 #undef END
 #undef REG16
 #undef REG
@@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
                   !intel_engine_has_relative_mmio(engine));
 
        if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
-               if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+               if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+                       return mtl_rcs_offsets;
+               else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
                        return dg2_rcs_offsets;
                else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
                        return xehp_rcs_offsets;
@@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
                else
                        return gen8_rcs_offsets;
        } else {
-               if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+               if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+                       return mtl_xcs_offsets;
+               else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
                        return dg2_xcs_offsets;
                else if (GRAPHICS_VER(engine->i915) >= 12)
                        return gen12_xcs_offsets;
@@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 static u32
 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 {
-       switch (GRAPHICS_VER(engine->i915)) {
-       default:
-               MISSING_CASE(GRAPHICS_VER(engine->i915));
-               fallthrough;
-       case 12:
+       if (GRAPHICS_VER(engine->i915) >= 12)
                return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 11:
+       else if (GRAPHICS_VER(engine->i915) >= 11)
                return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 9:
+       else if (GRAPHICS_VER(engine->i915) >= 9)
                return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 8:
+       else if (GRAPHICS_VER(engine->i915) >= 8)
                return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       }
+
+       GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
+
+       return 0;
 }
 
 static void
@@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
                context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
-       if (GRAPHICS_VER(engine->i915) == 12) {
+       if (GRAPHICS_VER(engine->i915) >= 12) {
                ce->wa_bb_page = context_size / PAGE_SIZE;
                context_size += PAGE_SIZE;
        }
@@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
        unsigned int i;
        int err;
 
-       if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
+       if (GRAPHICS_VER(engine->i915) >= 11 ||
+           !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
                return;
 
-       switch (GRAPHICS_VER(engine->i915)) {
-       case 12:
-       case 11:
-               return;
-       case 9:
+       if (GRAPHICS_VER(engine->i915) == 9) {
                wa_bb_fn[0] = gen9_init_indirectctx_bb;
                wa_bb_fn[1] = NULL;
-               break;
-       case 8:
+       } else if (GRAPHICS_VER(engine->i915) == 8) {
                wa_bb_fn[0] = gen8_init_indirectctx_bb;
                wa_bb_fn[1] = NULL;
-               break;
-       default:
-               MISSING_CASE(GRAPHICS_VER(engine->i915));
-               return;
        }
 
        err = lrc_create_wa_ctx(engine);
index a390f0813c8b64e5b0ee08a48a9ca7e875ad9e94..7111bae759f3f9e1a592d3d350c279cb0ddf0e5d 100644 (file)
@@ -110,6 +110,8 @@ enum {
 #define XEHP_SW_CTX_ID_WIDTH                   16
 #define XEHP_SW_COUNTER_SHIFT                  58
 #define XEHP_SW_COUNTER_WIDTH                  6
+#define GEN12_GUC_SW_CTX_ID_SHIFT              39
+#define GEN12_GUC_SW_CTX_ID_WIDTH              16
 
 static inline void lrc_runtime_start(struct intel_context *ce)
 {
index aaaf1906026c1351b15cb4f74608fee41478f9b9..b405a04135ca2178692284b8f743d9d6cb0246bd 100644 (file)
@@ -10,6 +10,7 @@
 #include "intel_gtt.h"
 #include "intel_migrate.h"
 #include "intel_ring.h"
+#include "gem/i915_gem_lmem.h"
 
 struct insert_pte_data {
        u64 offset;
index 152244d7f62a03cd2810a1995629ebae90ec89be..49fdd509527ad6ddec399512fadb6be5a20c3011 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "intel_engine.h"
 #include "intel_gt.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_regs.h"
 #include "intel_mocs.h"
 #include "intel_ring.h"
@@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
             0; \
             i++)
 
-static void init_l3cc_table(struct intel_uncore *uncore,
+static void init_l3cc_table(struct intel_gt *gt,
                            const struct drm_i915_mocs_table *table)
 {
        unsigned int i;
        u32 l3cc;
 
        for_each_l3cc(l3cc, table, i)
-               intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
+               if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+                       intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
+               else
+                       intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
 }
 
 void intel_mocs_init_engine(struct intel_engine_cs *engine)
@@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
                init_mocs_table(engine, &table);
 
        if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
-               init_l3cc_table(engine->uncore, &table);
+               init_l3cc_table(engine->gt, &table);
 }
 
 static u32 global_mocs_offset(void)
@@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
         * memory transactions including guc transactions
         */
        if (flags & HAS_RENDER_L3CC)
-               init_l3cc_table(gt->uncore, &table);
+               init_l3cc_table(gt, &table);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
index b366743569862d89dfc767a8afc873b6caa60797..3159df6cdd492f7b651be0acfa669e2033fa037a 100644 (file)
@@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
        kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
 
        /* Use a watchdog to ensure that our reset completes */
-       intel_wedge_on_timeout(&w, gt, 5 * HZ) {
+       intel_wedge_on_timeout(&w, gt, 60 * HZ) {
                intel_display_prepare_reset(gt->i915);
 
                intel_gt_reset(gt, engine_mask, reason);
index 6b86250c31ab56c44dcbd7cee0e2599defe300a8..6c34a83c24b3455af6c6683ce00e3213726699c5 100644 (file)
@@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
        rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 
        /* Ack interrupts, disable EFC interrupt */
-       intel_uncore_write(uncore, MEMINTREN,
-                          intel_uncore_read(uncore, MEMINTREN) &
-                          ~MEMINT_EVAL_CHG_EN);
+       intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
        intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
 
        /* Go back to the starting frequency */
@@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq)
                if (rps_uses_slpc(rps)) {
                        slpc = rps_to_slpc(rps);
 
+                       if (slpc->min_freq_softlimit >= slpc->boost_freq)
+                               return;
+
                        /* Return if old value is non zero */
-                       if (!atomic_fetch_inc(&slpc->num_waiters))
+                       if (!atomic_fetch_inc(&slpc->num_waiters)) {
+                               GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+                                        rq->fence.context, rq->fence.seqno);
                                schedule_work(&slpc->boost_work);
+                       }
 
                        return;
                }
@@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
                return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
 }
 
-/**
- * gen6_rps_get_freq_caps - Get freq caps exposed by HW
- * @rps: the intel_rps structure
- * @caps: returned freq caps
- *
- * Returned "caps" frequencies should be converted to MHz using
- * intel_gpu_freq()
- */
-void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+static void
+mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+       struct intel_uncore *uncore = rps_to_uncore(rps);
+       u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
+                               intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
+                               intel_uncore_read(uncore, MTL_RP_STATE_CAP);
+       u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
+                       intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
+                       intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
+
+       /* MTL values are in units of 16.67 MHz */
+       caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
+       caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
+       caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
+}
+
+static void
+__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
 {
        struct drm_i915_private *i915 = rps_to_i915(rps);
        u32 rp_state_cap;
@@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
        }
 }
 
+/**
+ * gen6_rps_get_freq_caps - Get freq caps exposed by HW
+ * @rps: the intel_rps structure
+ * @caps: returned freq caps
+ *
+ * Returned "caps" frequencies should be converted to MHz using
+ * intel_gpu_freq()
+ */
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+       struct drm_i915_private *i915 = rps_to_i915(rps);
+
+       if (IS_METEORLAKE(i915))
+               return mtl_get_freq_caps(rps, caps);
+       else
+               return __gen6_rps_get_freq_caps(rps, caps);
+}
+
 static void gen6_rps_init(struct intel_rps *rps)
 {
        struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
                return intel_gpu_freq(rps, rps->min_freq);
 }
 
+static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+       struct intel_gt *gt = rps_to_gt(rps);
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_rps_freq_caps caps;
+       u32 rp_state_limits;
+       u32 gt_perf_status;
+       u32 rpmodectl, rpinclimit, rpdeclimit;
+       u32 rpstat, cagf, reqf;
+       u32 rpcurupei, rpcurup, rpprevup;
+       u32 rpcurdownei, rpcurdown, rpprevdown;
+       u32 rpupei, rpupt, rpdownei, rpdownt;
+       u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+
+       rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+       gen6_rps_get_freq_caps(rps, &caps);
+       if (IS_GEN9_LP(i915))
+               gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+       else
+               gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+
+       /* RPSTAT1 is in the GT power well */
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+       if (GRAPHICS_VER(i915) >= 9) {
+               reqf >>= 23;
+       } else {
+               reqf &= ~GEN6_TURBO_DISABLE;
+               if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+                       reqf >>= 24;
+               else
+                       reqf >>= 25;
+       }
+       reqf = intel_gpu_freq(rps, reqf);
+
+       rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+       rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+       rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+       rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+       rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+       rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+       rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+       rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+       rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+       rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+       rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+       rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+       rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+       rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+       cagf = intel_rps_read_actual_frequency(rps);
+
+       intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+       if (GRAPHICS_VER(i915) >= 11) {
+               pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+               pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+               /*
+                * The equivalent to the PM ISR & IIR cannot be read
+                * without affecting the current state of the system
+                */
+               pm_isr = 0;
+               pm_iir = 0;
+       } else if (GRAPHICS_VER(i915) >= 8) {
+               pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+               pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+               pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+               pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+       } else {
+               pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+               pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+               pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+               pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+       }
+       pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+       drm_printf(p, "Video Turbo Mode: %s\n",
+                  str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
+       drm_printf(p, "HW control enabled: %s\n",
+                  str_yes_no(rpmodectl & GEN6_RP_ENABLE));
+       drm_printf(p, "SW control enabled: %s\n",
+                  str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
+
+       drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+                  pm_ier, pm_imr, pm_mask);
+       if (GRAPHICS_VER(i915) <= 10)
+               drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
+                          pm_isr, pm_iir);
+       drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+                  rps->pm_intrmsk_mbz);
+       drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+       drm_printf(p, "Render p-state ratio: %d\n",
+                  (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+       drm_printf(p, "Render p-state VID: %d\n",
+                  gt_perf_status & 0xff);
+       drm_printf(p, "Render p-state limit: %d\n",
+                  rp_state_limits & 0xff);
+       drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
+       drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
+       drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+       drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+       drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
+       drm_printf(p, "CAGF: %dMHz\n", cagf);
+       drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
+                  rpcurupei,
+                  intel_gt_pm_interval_to_ns(gt, rpcurupei));
+       drm_printf(p, "RP CUR UP: %d (%lldns)\n",
+                  rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+       drm_printf(p, "RP PREV UP: %d (%lldns)\n",
+                  rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
+       drm_printf(p, "Up threshold: %d%%\n",
+                  rps->power.up_threshold);
+       drm_printf(p, "RP UP EI: %d (%lldns)\n",
+                  rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+       drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
+                  rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+       drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
+                  rpcurdownei,
+                  intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+       drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
+                  rpcurdown,
+                  intel_gt_pm_interval_to_ns(gt, rpcurdown));
+       drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
+                  rpprevdown,
+                  intel_gt_pm_interval_to_ns(gt, rpprevdown));
+       drm_printf(p, "Down threshold: %d%%\n",
+                  rps->power.down_threshold);
+       drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
+                  rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+       drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
+                  rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
+
+       drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.min_freq));
+       drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.rp1_freq));
+       drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.rp0_freq));
+       drm_printf(p, "Max overclocked frequency: %dMHz\n",
+                  intel_gpu_freq(rps, rps->max_freq));
+
+       drm_printf(p, "Current freq: %d MHz\n",
+                  intel_gpu_freq(rps, rps->cur_freq));
+       drm_printf(p, "Actual freq: %d MHz\n", cagf);
+       drm_printf(p, "Idle freq: %d MHz\n",
+                  intel_gpu_freq(rps, rps->idle_freq));
+       drm_printf(p, "Min freq: %d MHz\n",
+                  intel_gpu_freq(rps, rps->min_freq));
+       drm_printf(p, "Boost freq: %d MHz\n",
+                  intel_gpu_freq(rps, rps->boost_freq));
+       drm_printf(p, "Max freq: %d MHz\n",
+                  intel_gpu_freq(rps, rps->max_freq));
+       drm_printf(p,
+                  "efficient (RPe) frequency: %d MHz\n",
+                  intel_gpu_freq(rps, rps->efficient_freq));
+}
+
+static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+       struct intel_gt *gt = rps_to_gt(rps);
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_rps_freq_caps caps;
+       u32 pm_mask;
+
+       gen6_rps_get_freq_caps(rps, &caps);
+       pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+       drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
+       drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+                  rps->pm_intrmsk_mbz);
+       drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1));
+       drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
+       drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.min_freq));
+       drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.rp1_freq));
+       drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+                  intel_gpu_freq(rps, caps.rp0_freq));
+       drm_printf(p, "Current freq: %d MHz\n",
+                  intel_rps_get_requested_frequency(rps));
+       drm_printf(p, "Actual freq: %d MHz\n",
+                  intel_rps_read_actual_frequency(rps));
+       drm_printf(p, "Min freq: %d MHz\n",
+                  intel_rps_get_min_frequency(rps));
+       drm_printf(p, "Boost freq: %d MHz\n",
+                  intel_rps_get_boost_frequency(rps));
+       drm_printf(p, "Max freq: %d MHz\n",
+                  intel_rps_get_max_frequency(rps));
+       drm_printf(p,
+                  "efficient (RPe) frequency: %d MHz\n",
+                  intel_gpu_freq(rps, caps.rp1_freq));
+}
+
+void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+       if (rps_uses_slpc(rps))
+               return slpc_frequency_dump(rps, p);
+       else
+               return rps_frequency_dump(rps, p);
+}
+
 static int set_max_freq(struct intel_rps *rps, u32 val)
 {
        struct drm_i915_private *i915 = rps_to_i915(rps);
index 4509dfdc52e09d90775fec8054c8ffcae6207cba..110300dfd4383e85f8a3ab78b0341a528d74b211 100644 (file)
@@ -10,6 +10,7 @@
 #include "i915_reg_defs.h"
 
 struct i915_request;
+struct drm_printer;
 
 void intel_rps_init_early(struct intel_rps *rps);
 void intel_rps_init(struct intel_rps *rps);
@@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps);
 u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
 bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
 
+void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p);
+
 void gen5_rps_irq_handler(struct intel_rps *rps);
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
index 66f21c735d548a5301773cdb12cda9dae319bde2..6c6198a257ac67305c3d5c318bf9f2ef1a41fcb9 100644 (file)
@@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
         * If i915/perf is active, we want a stable powergating configuration
         * on the system. Use the configuration pinned by i915/perf.
         */
-       if (i915->perf.exclusive_stream)
-               req_sseu = &i915->perf.sseu;
+       if (gt->perf.exclusive_stream)
+               req_sseu = &gt->perf.sseu;
 
        slices = hweight8(req_sseu->slice_mask);
        subslices = hweight8(req_sseu->subslice_mask);
index a821e3d405dbef0e9af9eed175db8b26e96e5764..3cdf5c24dbc50728396ab0baadb02055d10d91cd 100644 (file)
@@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
        _wa_add(wal, &wa);
 }
 
+static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+                      u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+       struct i915_wa wa = {
+               .mcr_reg = reg,
+               .clr  = clear,
+               .set  = set,
+               .read = read_mask,
+               .masked_reg = masked_reg,
+               .is_mcr = 1,
+       };
+
+       _wa_add(wal, &wa);
+}
+
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
        wa_add(wal, reg, clear, set, clear, false);
 }
 
+static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
+{
+       wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
 static void
 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
        wa_write_clr_set(wal, reg, set, set);
 }
 
+static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
+{
+       wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
 static void
 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 {
        wa_write_clr_set(wal, reg, clr, 0);
 }
 
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
+{
+       wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
 /*
  * WA operations on "masked register". A masked register has the upper 16 bits
  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
        wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
        wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
                    u32 mask, u32 val)
@@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
        wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+                       u32 mask, u32 val)
+{
+       wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
                                      struct i915_wa_list *wal)
 {
@@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
        wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 
        /* WaDisablePartialInstShootdown:bdw,chv */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
        /* Use Force Non-Coherent whenever executing a 3D context. This is a
         * workaround for a possible hang in the unlikely event a TLB
@@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen8_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
        /* WaDisableDopClockGating:bdw
         *
         * Also see the related UCGTCL1 write in bdw_init_clock_gating()
         * to disable EUTC clock gating.
         */
-       wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                    DOP_CLOCK_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                        DOP_CLOCK_GATING_DISABLE);
 
-       wa_masked_en(wal, HALF_SLICE_CHICKEN3,
-                    GEN8_SAMPLER_POWER_BYPASS_DIS);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+                        GEN8_SAMPLER_POWER_BYPASS_DIS);
 
        wa_masked_en(wal, HDC_CHICKEN0,
                     /* WaForceContextSaveRestoreNonCoherent:bdw */
@@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen8_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:chv */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
        /* Improve HiZ throughput on CHV. */
        wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
@@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
                 */
                wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
                             GEN9_PBE_COMPRESSED_HASH_SELECTION);
-               wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                            GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+               wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                                GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
        }
 
        /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
        /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    FLOW_CONTROL_ENABLE |
-                    PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        FLOW_CONTROL_ENABLE |
+                        PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
        /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
        /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
-       wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                    GEN9_ENABLE_YV12_BUGFIX |
-                    GEN9_ENABLE_GPGPU_PREEMPTION);
+       wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                        GEN9_ENABLE_YV12_BUGFIX |
+                        GEN9_ENABLE_GPGPU_PREEMPTION);
 
        /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
        /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
@@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
                     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 
        /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
-       wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
-                     GEN9_CCS_TLB_PREFETCH_ENABLE);
+       wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
+                         GEN9_CCS_TLB_PREFETCH_ENABLE);
 
        /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
        wa_masked_en(wal, HDC_CHICKEN0,
@@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
            IS_KABYLAKE(i915) ||
            IS_COFFEELAKE(i915) ||
            IS_COMETLAKE(i915))
-               wa_masked_en(wal, HALF_SLICE_CHICKEN3,
-                            GEN8_SAMPLER_POWER_BYPASS_DIS);
+               wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+                                GEN8_SAMPLER_POWER_BYPASS_DIS);
 
        /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
-       wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+       wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
        /*
         * Supporting preemption with fine-granularity requires changes in the
@@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
        gen9_ctx_workarounds_init(engine, wal);
 
        /* WaDisableThreadStallDopClockGating:bxt */
-       wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                    STALL_DOP_GATING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                        STALL_DOP_GATING_DISABLE);
 
        /* WaToEnableHwFixForPushConstHWBug:bxt */
        wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
@@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
                             GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
        /* WaDisableSbeCacheDispatchPortSharing:kbl */
-       wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                    GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
                     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
        /* WaDisableSbeCacheDispatchPortSharing:cfl */
-       wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                    GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+       wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
         * (the register is whitelisted in hardware now, so UMDs can opt in
         * for coherency if they have a good reason).
         */
-       wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+       wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 
        /* WaEnableFloatBlendOptimization:icl */
-       wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-              _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
-              0 /* write-only, so skip validation */,
-              true);
+       wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                  _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+                  0 /* write-only, so skip validation */,
+                  true);
 
        /* WaDisableGPGPUMidThreadPreemption:icl */
        wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
                            GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 
        /* allow headerless messages for preemptible GPGPU context */
-       wa_masked_en(wal, GEN10_SAMPLER_MODE,
-                    GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+       wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+                        GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 
        /* Wa_1604278689:icl,ehl */
        wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
@@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
                         0xFFFFFFFF);
 
        /* Wa_1406306137:icl,ehl */
-       wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
+       wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
 /*
@@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
                                   struct i915_wa_list *wal)
 {
        wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
-       wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
-                        REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
-       wa_add(wal,
-              FF_MODE2,
-              FF_MODE2_TDS_TIMER_MASK,
-              FF_MODE2_TDS_TIMER_128,
-              0, false);
+       wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+                            REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+       wa_mcr_add(wal,
+                  XEHP_FF_MODE2,
+                  FF_MODE2_TDS_TIMER_MASK,
+                  FF_MODE2_TDS_TIMER_128,
+                  0, false);
 }
 
 /*
@@ -599,7 +651,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
         * verification is ignored.
         */
        wa_add(wal,
-              FF_MODE2,
+              GEN12_FF_MODE2,
               FF_MODE2_TDS_TIMER_MASK,
               FF_MODE2_TDS_TIMER_128,
               0, false);
@@ -608,6 +660,8 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
                                       struct i915_wa_list *wal)
 {
+       struct drm_i915_private *i915 = engine->i915;
+
        gen12_ctx_gt_tuning_init(engine, wal);
 
        /*
@@ -637,10 +691,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
         * to Wa_1608008084.
         */
        wa_add(wal,
-              FF_MODE2,
+              GEN12_FF_MODE2,
               FF_MODE2_GS_TIMER_MASK,
               FF_MODE2_GS_TIMER_224,
               0, false);
+
+       if (!IS_DG1(i915))
+               /* Wa_1806527549 */
+               wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
 }
 
 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -664,27 +722,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
        /* Wa_16011186671:dg2_g11 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
-               wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
-               wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+               wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+               wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
        }
 
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_14010469329:dg2_g10 */
-               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
-                            XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+               wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+                                XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
 
                /*
                 * Wa_22010465075:dg2_g10
                 * Wa_22010613112:dg2_g10
                 * Wa_14010698770:dg2_g10
                 */
-               wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
-                            GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+               wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+                                GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
        }
 
        /* Wa_16013271637:dg2 */
-       wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
-                    MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+       wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+                        MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 
        /* Wa_14014947963:dg2 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
@@ -1076,18 +1134,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
        wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
 }
 
-static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
-                        unsigned int slice, unsigned int subslice)
+static void debug_dump_steering(struct intel_gt *gt)
 {
        struct drm_printer p = drm_debug_printer("MCR Steering:");
 
+       if (drm_debug_enabled(DRM_UT_DRIVER))
+               intel_gt_mcr_report_steering(&p, gt, false);
+}
+
+static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
+                        unsigned int slice, unsigned int subslice)
+{
        __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
 
        gt->default_steering.groupid = slice;
        gt->default_steering.instanceid = subslice;
 
-       if (drm_debug_enabled(DRM_UT_DRIVER))
-               intel_gt_mcr_report_steering(&p, gt, false);
+       debug_dump_steering(gt);
 }
 
 static void
@@ -1181,6 +1244,9 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
                gt->steering_table[MSLICE] = NULL;
        }
 
+       if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
+               gt->steering_table[GAM] = NULL;
+
        slice = __ffs(slice_mask);
        subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
                GEN_DSS_PER_GSLICE;
@@ -1198,6 +1264,13 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
         */
        __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
        __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
+
+       /*
+        * On DG2, GAM registers have a dedicated steering control register
+        * and must always be programmed to a hardcoded groupid of "1."
+        */
+       if (IS_DG2(gt->i915))
+               __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
 }
 
 static void
@@ -1254,22 +1327,22 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                    PSDUNIT_CLKGATE_DIS);
 
        /* Wa_1406680159:icl,ehl */
-       wa_write_or(wal,
-                   SUBSLICE_UNIT_LEVEL_CLKGATE,
-                   GWUNIT_CLKGATE_DIS);
+       wa_mcr_write_or(wal,
+                       GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+                       GWUNIT_CLKGATE_DIS);
 
        /* Wa_1607087056:icl,ehl,jsl */
        if (IS_ICELAKE(i915) ||
            IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /*
         * This is not a documented workaround, but rather an optimization
         * to reduce sampler power.
         */
-       wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+       wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 /*
@@ -1303,7 +1376,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        wa_14011060649(gt, wal);
 
        /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
-       wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+       wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 static void
@@ -1315,14 +1388,14 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 
        /* Wa_1409420604:tgl */
        if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
-               wa_write_or(wal,
-                           SUBSLICE_UNIT_LEVEL_CLKGATE2,
-                           CPSSUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal,
+                               SUBSLICE_UNIT_LEVEL_CLKGATE2,
+                               CPSSUNIT_CLKGATE_DIS);
 
        /* Wa_1607087056:tgl also know as BUG:1409180338 */
        if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /* Wa_1408615072:tgl[a0] */
@@ -1341,14 +1414,14 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        /* Wa_1607087056:dg1 */
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
                wa_write_or(wal,
-                           SLICE_UNIT_LEVEL_CLKGATE,
+                           GEN11_SLICE_UNIT_LEVEL_CLKGATE,
                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 
        /* Wa_1409420604:dg1 */
        if (IS_DG1(i915))
-               wa_write_or(wal,
-                           SUBSLICE_UNIT_LEVEL_CLKGATE2,
-                           CPSSUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal,
+                               SUBSLICE_UNIT_LEVEL_CLKGATE2,
+                               CPSSUNIT_CLKGATE_DIS);
 
        /* Wa_1408615072:dg1 */
        /* Empirical testing shows this register is unaffected by engine reset. */
@@ -1365,7 +1438,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        xehp_init_mcr(gt, wal);
 
        /* Wa_1409757795:xehpsdv */
-       wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+       wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
 
        /* Wa_16011155590:xehpsdv */
        if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
@@ -1445,8 +1518,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                            CG3DDISCFEG_CLKGATE_DIS);
 
                /* Wa_14011006942:dg2 */
-               wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
-                           DSS_ROUTER_CLKGATE_DIS);
+               wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+                               DSS_ROUTER_CLKGATE_DIS);
        }
 
        if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
@@ -1457,7 +1530,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
 
                /* Wa_14011371254:dg2_g10 */
-               wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+               wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
 
                /* Wa_14011431319:dg2_g10 */
                wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
@@ -1493,21 +1566,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
                            GAMEDIA_CLKGATE_DIS);
 
                /* Wa_14011028019:dg2_g10 */
-               wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+               wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
        }
 
        /* Wa_14014830051:dg2 */
-       wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+       wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
 
        /*
         * The following are not actually "workarounds" but rather
         * recommended tuning settings documented in the bspec's
         * performance guide section.
         */
-       wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+       wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
 
        /* Wa_14015795083 */
-       wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+       wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
 static void
@@ -1516,7 +1589,27 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
        pvc_init_mcr(gt, wal);
 
        /* Wa_14015795083 */
-       wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+       wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+}
+
+static void
+xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       /* FIXME: Actual workarounds will be added in future patch(es) */
+
+       /*
+        * Unlike older platforms, we no longer setup implicit steering here;
+        * all MCR accesses are explicitly steered.
+        */
+       debug_dump_steering(gt);
+}
+
+static void
+xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+       /* FIXME: Actual workarounds will be added in future patch(es) */
+
+       debug_dump_steering(gt);
 }
 
 static void
@@ -1524,7 +1617,18 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
        struct drm_i915_private *i915 = gt->i915;
 
-       if (IS_PONTEVECCHIO(i915))
+       if (gt->type == GT_MEDIA) {
+               if (MEDIA_VER(i915) >= 13)
+                       xelpmp_gt_workarounds_init(gt, wal);
+               else
+                       MISSING_CASE(MEDIA_VER(i915));
+
+               return;
+       }
+
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+               xelpg_gt_workarounds_init(gt, wal);
+       else if (IS_PONTEVECCHIO(i915))
                pvc_gt_workarounds_init(gt, wal);
        else if (IS_DG2(i915))
                dg2_gt_workarounds_init(gt, wal);
@@ -1628,14 +1732,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
                u32 val, old = 0;
 
                /* open-coded rmw due to steering */
-               old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
+               if (wa->clr)
+                       old = wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg);
                val = (old & ~wa->clr) | wa->set;
-               if (val != old || !wa->clr)
-                       intel_uncore_write_fw(uncore, wa->reg, val);
+               if (val != old || !wa->clr) {
+                       if (wa->is_mcr)
+                               intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
+                       else
+                               intel_uncore_write_fw(uncore, wa->reg, val);
+               }
+
+               if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+                       u32 val = wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg);
 
-               if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-                       wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
-                                 wal->name, "application");
+                       wa_verify(wa, val, wal->name, "application");
+               }
        }
 
        intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1664,8 +1779,9 @@ static bool wa_list_verify(struct intel_gt *gt,
        intel_uncore_forcewake_get__locked(uncore, fw);
 
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
-               ok &= wa_verify(wa,
-                               intel_gt_mcr_read_any_fw(gt, wa->reg),
+               ok &= wa_verify(wa, wa->is_mcr ?
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+                               intel_uncore_read_fw(uncore, wa->reg),
                                wal->name, from);
 
        intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1711,12 +1827,36 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
        _wa_add(wal, &wa);
 }
 
+static void
+whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
+{
+       struct i915_wa wa = {
+               .mcr_reg = reg,
+               .is_mcr = 1,
+       };
+
+       if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
+               return;
+
+       if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
+               return;
+
+       wa.mcr_reg.reg |= flags;
+       _wa_add(wal, &wa);
+}
+
 static void
 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
 {
        whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
 }
 
+static void
+whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
+{
+       whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
+}
+
 static void gen9_whitelist_build(struct i915_wa_list *w)
 {
        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1742,7 +1882,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine)
        gen9_whitelist_build(w);
 
        /* WaDisableLSQCROPERFforOCL:skl */
-       whitelist_reg(w, GEN8_L3SQCREG4);
+       whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void bxt_whitelist_build(struct intel_engine_cs *engine)
@@ -1763,7 +1903,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine)
        gen9_whitelist_build(w);
 
        /* WaDisableLSQCROPERFforOCL:kbl */
-       whitelist_reg(w, GEN8_L3SQCREG4);
+       whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void glk_whitelist_build(struct intel_engine_cs *engine)
@@ -1828,10 +1968,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
        switch (engine->class) {
        case RENDER_CLASS:
                /* WaAllowUMDToModifyHalfSliceChicken7:icl */
-               whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+               whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
 
                /* WaAllowUMDToModifySamplerMode:icl */
-               whitelist_reg(w, GEN10_SAMPLER_MODE);
+               whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
 
                /* WaEnableStateCacheRedirectToCS:icl */
                whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
@@ -2107,24 +2247,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 
        if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14013392000:dg2_g11 */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
-
-               /* Wa_16011620976:dg2_g11 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
                /* Wa_1509727124:dg2 */
-               wa_masked_en(wal, GEN10_SAMPLER_MODE,
-                            SC_DISABLE_POWER_OPTIMIZATION_EBB);
+               wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+                                SC_DISABLE_POWER_OPTIMIZATION_EBB);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
            IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14012419201:dg2 */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
@@ -2133,13 +2270,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_22012826095:dg2
                 * Wa_22013059131:dg2
                 */
-               wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
-                                MAXREQS_PER_BANK,
-                                REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+               wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+                                    MAXREQS_PER_BANK,
+                                    REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
 
                /* Wa_22013059131:dg2 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0,
-                           FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+                               FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
        }
 
        /* Wa_1308578152:dg2_g10 when first gslice is fused off */
@@ -2152,19 +2289,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
                /* Wa_22013037850:dg2 */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
-                           DISABLE_128B_EVICTION_COMMAND_UDW);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+                               DISABLE_128B_EVICTION_COMMAND_UDW);
 
                /* Wa_22012856258:dg2 */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                            GEN12_DISABLE_READ_SUPPRESSION);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                                GEN12_DISABLE_READ_SUPPRESSION);
 
                /*
                 * Wa_22010960976:dg2
                 * Wa_14013347512:dg2
                 */
-               wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
-                             LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+               wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
+                                 LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
@@ -2172,8 +2309,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_1608949956:dg2_g10
                 * Wa_14010198302:dg2_g10
                 */
-               wa_masked_en(wal, GEN8_ROW_CHICKEN,
-                            MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+                                MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
 
                /*
                 * Wa_14010918519:dg2_g10
@@ -2181,31 +2318,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
                 * so ignoring verification.
                 */
-               wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
-                      FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
-                      0, false);
+               wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+                          FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+                          0, false);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_22010430635:dg2 */
-               wa_masked_en(wal,
-                            GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_GRF_CLEAR);
+               wa_mcr_masked_en(wal,
+                                GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_GRF_CLEAR);
 
                /* Wa_14010648519:dg2 */
-               wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+               wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
        }
 
        /* Wa_14013202645:dg2 */
        if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
            IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
-               wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+               wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
 
        /* Wa_22012532006:dg2 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
            IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
-               wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-                            DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+               wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+                                DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
 
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
                /* Wa_14010680813:dg2_g10 */
@@ -2216,17 +2353,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
            IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
                /* Wa_14012362059:dg2 */
-               wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
        }
 
        if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
            IS_DG2_G10(i915)) {
                /* Wa_22014600077:dg2 */
-               wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-                      _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-                      0 /* Wa_14012342262 :write-only reg, so skip
-                           verification */,
-                      true);
+               wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                          _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+                          0 /* Wa_14012342262 write-only reg, so skip verification */,
+                          true);
        }
 
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2253,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
 
                /*
                 * Wa_1407928979:tgl A*
@@ -2282,14 +2418,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
            IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
            IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
                /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
-               wa_masked_en(wal, GEN7_ROW_CHICKEN2,
-                            GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+               wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+                                GEN12_PUSH_CONST_DEREF_HOLD_DIS);
 
                /*
                 * Wa_1409085225:tgl
                 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
                 */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
        }
 
        if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2313,9 +2449,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
        if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
            IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
                /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
-               wa_masked_en(wal,
-                            GEN10_SAMPLER_MODE,
-                            ENABLE_SMALLPL);
+               wa_mcr_masked_en(wal,
+                                GEN10_SAMPLER_MODE,
+                                ENABLE_SMALLPL);
        }
 
        if (GRAPHICS_VER(i915) == 11) {
@@ -2349,9 +2485,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                 * Wa_1405733216:icl
                 * Formerly known as WaDisableCleanEvicts
                 */
-               wa_write_or(wal,
-                           GEN8_L3SQCREG4,
-                           GEN11_LQSC_CLEAN_EVICT_DISABLE);
+               wa_mcr_write_or(wal,
+                               GEN8_L3SQCREG4,
+                               GEN11_LQSC_CLEAN_EVICT_DISABLE);
 
                /* Wa_1606682166:icl */
                wa_write_or(wal,
@@ -2359,10 +2495,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                            GEN7_DISABLE_SAMPLER_PREFETCH);
 
                /* Wa_1409178092:icl */
-               wa_write_clr_set(wal,
-                                GEN11_SCRATCH2,
-                                GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
-                                0);
+               wa_mcr_write_clr_set(wal,
+                                    GEN11_SCRATCH2,
+                                    GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
+                                    0);
 
                /* WaEnable32PlaneMode:icl */
                wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
@@ -2389,12 +2525,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                             FF_DOP_CLOCK_GATE_DISABLE);
        }
 
-       if (IS_GRAPHICS_VER(i915, 9, 12)) {
-               /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
+       /*
+        * Intel platforms that support fine-grained preemption (i.e., gen9 and
+        * beyond) allow the kernel-mode driver to choose between two different
+        * options for controlling preemption granularity and behavior.
+        *
+        * Option 1 (hardware default):
+        *   Preemption settings are controlled in a global manner via
+        *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
+        *   and settings chosen by the kernel-mode driver will apply to all
+        *   userspace clients.
+        *
+        * Option 2:
+        *   Preemption settings are controlled on a per-context basis via
+        *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
+        *   context switch and is writable by userspace (e.g., via
+        *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
+        *   which allows different userspace drivers/clients to select
+        *   different settings, or to change those settings on the fly in
+        *   response to runtime needs.  This option was known by name
+        *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
+        *   that name is somewhat misleading as other non-granularity
+        *   preemption settings are also impacted by this decision.
+        *
+        * On Linux, our policy has always been to let userspace drivers
+        * control preemption granularity/settings (Option 2).  This was
+        * originally mandatory on gen9 to prevent ABI breakage (old gen9
+        * userspace developed before object-level preemption was enabled would
+        * not behave well if i915 were to go with Option 1 and enable that
+        * preemption in a global manner).  On gen9 each context would have
+        * object-level preemption disabled by default (see
+        * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
+        * userspace drivers could opt-in to object-level preemption as they
+        * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
+        * even though it is no longer necessary for ABI compatibility when
+        * enabling a new platform, it does ensure that userspace will be able
+        * to implement any workarounds that show up requiring temporary
+        * adjustments to preemption behavior at runtime.
+        *
+        * Notes/Workarounds:
+        *  - Wa_14015141709:  On DG2 and early steppings of MTL,
+        *      CS_CHICKEN1[0] does not disable object-level preemption as
+        *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
+        *      using Option 1).  Effectively this means userspace is unable
+        *      to disable object-level preemption on these platforms/steppings
+        *      despite the setting here.
+        *
+        *  - Wa_16013994831:  May require that userspace program
+        *      CS_CHICKEN1[10] when certain runtime conditions are true.
+        *      Userspace requires Option 2 to be in effect for their update of
+        *      CS_CHICKEN1[10] to be effective.
+        *
+        * Other workarounds may appear in the future that will also require
+        * Option 2 behavior to allow proper userspace implementation.
+        */
+       if (GRAPHICS_VER(i915) >= 9)
                wa_masked_en(wal,
                             GEN7_FF_SLICE_CS_CHICKEN1,
                             GEN9_FFSC_PERCTX_PREEMPT_CTRL);
-       }
 
        if (IS_SKYLAKE(i915) ||
            IS_KABYLAKE(i915) ||
@@ -2420,36 +2608,36 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                             GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
 
                /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-               wa_write_or(wal,
-                           BDW_SCRATCH1,
-                           GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+               wa_mcr_write_or(wal,
+                               BDW_SCRATCH1,
+                               GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
                /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
                if (IS_GEN9_LP(i915))
-                       wa_write_clr_set(wal,
-                                        GEN8_L3SQCREG1,
-                                        L3_PRIO_CREDITS_MASK,
-                                        L3_GENERAL_PRIO_CREDITS(62) |
-                                        L3_HIGH_PRIO_CREDITS(2));
+                       wa_mcr_write_clr_set(wal,
+                                            GEN8_L3SQCREG1,
+                                            L3_PRIO_CREDITS_MASK,
+                                            L3_GENERAL_PRIO_CREDITS(62) |
+                                            L3_HIGH_PRIO_CREDITS(2));
 
                /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-               wa_write_or(wal,
-                           GEN8_L3SQCREG4,
-                           GEN8_LQSC_FLUSH_COHERENT_LINES);
+               wa_mcr_write_or(wal,
+                               GEN8_L3SQCREG4,
+                               GEN8_LQSC_FLUSH_COHERENT_LINES);
 
                /* Disable atomics in L3 to prevent unrecoverable hangs */
                wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
                                 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-               wa_write_clr_set(wal, GEN8_L3SQCREG4,
-                                GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-               wa_write_clr_set(wal, GEN9_SCRATCH1,
-                                EVICTION_PERF_FIX_ENABLE, 0);
+               wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
+                                    GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
+               wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
+                                    EVICTION_PERF_FIX_ENABLE, 0);
        }
 
        if (IS_HASWELL(i915)) {
                /* WaSampleCChickenBitEnable:hsw */
                wa_masked_en(wal,
-                            HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+                            HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
 
                wa_masked_dis(wal,
                              CACHE_MODE_0_GEN7,
@@ -2657,7 +2845,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
        if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
                /* Wa_14014999345:pvc */
-               wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
+               wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
        }
 }
 
@@ -2683,8 +2871,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
        }
 
        if (IS_DG2(i915)) {
-               wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-               wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+               wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+               wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
 
                /*
                 * This is also listed as Wa_22012654132 for certain DG2
@@ -2695,10 +2883,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
                 * back for verification on DG2 (due to Wa_14012342262), so
                 * we need to explicitly skip the readback.
                 */
-               wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-                      _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-                      0 /* write-only, so skip validation */,
-                      true);
+               wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+                          _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+                          0 /* write-only, so skip validation */,
+                          true);
        }
 
        /*
@@ -2707,8 +2895,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
         * platforms.
         */
        if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
-               wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
-                                   THREAD_EX_ARB_MODE_RR_AFTER_DEP);
+               wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+                                       THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 }
 
 /*
@@ -2734,30 +2922,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
        if (IS_XEHPSDV(i915)) {
                /* Wa_1409954639 */
-               wa_masked_en(wal,
-                            GEN8_ROW_CHICKEN,
-                            SYSTOLIC_DOP_CLOCK_GATING_DIS);
+               wa_mcr_masked_en(wal,
+                                GEN8_ROW_CHICKEN,
+                                SYSTOLIC_DOP_CLOCK_GATING_DIS);
 
                /* Wa_1607196519 */
-               wa_masked_en(wal,
-                            GEN9_ROW_CHICKEN4,
-                            GEN12_DISABLE_GRF_CLEAR);
+               wa_mcr_masked_en(wal,
+                                GEN9_ROW_CHICKEN4,
+                                GEN12_DISABLE_GRF_CLEAR);
 
                /* Wa_14010670810:xehpsdv */
-               wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+               wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 
                /* Wa_14010449647:xehpsdv */
-               wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
-                            GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+               wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+                                GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
 
                /* Wa_18011725039:xehpsdv */
                if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
-                       wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
-                       wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+                       wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+                       wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
                }
 
                /* Wa_14012362059:xehpsdv */
-               wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
                /* Wa_14014368820:xehpsdv */
                wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
@@ -2766,19 +2954,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
        if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
                /* Wa_14015227452:dg2,pvc */
-               wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+               wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
 
                /* Wa_22014226127:dg2,pvc */
-               wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
 
                /* Wa_16015675438:dg2,pvc */
                wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
 
                /* Wa_18018781329:dg2,pvc */
-               wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-               wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+               wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+       }
+
+       if (IS_DG2(i915)) {
+               /*
+                * Wa_16011620976:dg2_g11
+                * Wa_22015475538:dg2
+                */
+               wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+               /* Wa_18017747507:dg2 */
+               wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
        }
 }
 
index 8a4b6de4e754854166167efbfff54b8cb03fa675..7c8b01d00043087cfb1020cc0a1685cbbed8bd90 100644 (file)
 #include "i915_reg_defs.h"
 
 struct i915_wa {
-       i915_reg_t      reg;
+       union {
+               i915_reg_t      reg;
+               i915_mcr_reg_t  mcr_reg;
+       };
        u32             clr;
        u32             set;
        u32             read;
-       bool            masked_reg;
+
+       u32             masked_reg:1;
+       u32             is_mcr:1;
 };
 
 struct i915_wa_list {
index 1b75f478d1b83421924e9fa7b0b6afc1f16cde2d..881b64f3e7b9920cfc1c10d7debfd6e736f0e3a9 100644 (file)
@@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt)
        return igt_flush_test(gt->i915);
 }
 
+static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *i915 = engine->i915;
+
+       if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
+               return RING_TIMESTAMP_UDW(engine->mmio_base);
+       else
+               return RING_TIMESTAMP(engine->mmio_base);
+}
+
 static int write_timestamp(struct i915_request *rq, int slot)
 {
        struct intel_timeline *tl =
@@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
        if (GRAPHICS_VER(rq->engine->i915) >= 8)
                cmd++;
        *cs++ = cmd;
-       *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+       *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
        *cs++ = tl->hwsp_offset + slot * sizeof(u32);
        *cs++ = 0;
 
@@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg)
        enum intel_engine_id id;
        int err = 0;
 
-       if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+       if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
                return 0;
 
        perf_begin(gt);
@@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg)
                u32 cycles[COUNT];
                int i;
 
+               if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
+                       continue;
+
                intel_engine_pm_get(engine);
 
                batch = create_empty_batch(ce);
@@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg)
        enum intel_engine_id id;
        int err = 0;
 
-       if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+       if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
                return 0;
 
        perf_begin(gt);
@@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg)
                u32 cycles[COUNT];
                int i;
 
+               if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
+                       continue;
+
                intel_engine_pm_get(engine);
 
                base = create_empty_batch(ce);
index 1e08b2473b99313a391b2f40ef466bc6bdf0c274..2c7c053a88081ff8ba9b561894b37fd721ccfcc8 100644 (file)
@@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine,
                        break;
        } while (time_before(jiffies, timeout));
 
-       flush_scheduled_work();
-
        if (rq->fence.error != -EIO) {
                pr_err("%s: hanging request %llx:%lld not reset\n",
                       engine->name,
@@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd)
 
 struct preempt_smoke {
        struct intel_gt *gt;
+       struct kthread_work work;
        struct i915_gem_context **contexts;
        struct intel_engine_cs *engine;
        struct drm_i915_gem_object *batch;
        unsigned int ncontext;
        struct rnd_state prng;
        unsigned long count;
+       int result;
 };
 
 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
@@ -3540,34 +3540,31 @@ unpin:
        return err;
 }
 
-static int smoke_crescendo_thread(void *arg)
+static void smoke_crescendo_work(struct kthread_work *work)
 {
-       struct preempt_smoke *smoke = arg;
+       struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
        IGT_TIMEOUT(end_time);
        unsigned long count;
 
        count = 0;
        do {
                struct i915_gem_context *ctx = smoke_context(smoke);
-               int err;
 
-               err = smoke_submit(smoke,
-                                  ctx, count % I915_PRIORITY_MAX,
-                                  smoke->batch);
-               if (err)
-                       return err;
+               smoke->result = smoke_submit(smoke, ctx,
+                                            count % I915_PRIORITY_MAX,
+                                            smoke->batch);
 
                count++;
-       } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
+       } while (!smoke->result && count < smoke->ncontext &&
+                !__igt_timeout(end_time, NULL));
 
        smoke->count = count;
-       return 0;
 }
 
 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 #define BATCH BIT(0)
 {
-       struct task_struct *tsk[I915_NUM_ENGINES] = {};
+       struct kthread_worker *worker[I915_NUM_ENGINES] = {};
        struct preempt_smoke *arg;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
@@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
        if (!arg)
                return -ENOMEM;
 
+       memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
+
        for_each_engine(engine, smoke->gt, id) {
                arg[id] = *smoke;
                arg[id].engine = engine;
@@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
                        arg[id].batch = NULL;
                arg[id].count = 0;
 
-               tsk[id] = kthread_run(smoke_crescendo_thread, arg,
-                                     "igt/smoke:%d", id);
-               if (IS_ERR(tsk[id])) {
-                       err = PTR_ERR(tsk[id]);
+               worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+               if (IS_ERR(worker[id])) {
+                       err = PTR_ERR(worker[id]);
                        break;
                }
-               get_task_struct(tsk[id]);
-       }
 
-       yield(); /* start all threads before we kthread_stop() */
+               kthread_init_work(&arg[id].work, smoke_crescendo_work);
+               kthread_queue_work(worker[id], &arg[id].work);
+       }
 
        count = 0;
        for_each_engine(engine, smoke->gt, id) {
-               int status;
-
-               if (IS_ERR_OR_NULL(tsk[id]))
+               if (IS_ERR_OR_NULL(worker[id]))
                        continue;
 
-               status = kthread_stop(tsk[id]);
-               if (status && !err)
-                       err = status;
+               kthread_flush_work(&arg[id].work);
+               if (arg[id].result && !err)
+                       err = arg[id].result;
 
                count += arg[id].count;
 
-               put_task_struct(tsk[id]);
+               kthread_destroy_worker(worker[id]);
        }
 
        pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
index be94f863bdefff29c44aac0d3ccd29b73954e422..b46425aeb2f04876dea28a39b5bda4433abb134e 100644 (file)
@@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B)
                return 0;
 }
 
+static u32 read_timestamp(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *i915 = engine->i915;
+
+       /* On i965 the first read tends to give a stale value */
+       ENGINE_READ_FW(engine, RING_TIMESTAMP);
+
+       if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
+               return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW);
+       else
+               return ENGINE_READ_FW(engine, RING_TIMESTAMP);
+}
+
 static void measure_clocks(struct intel_engine_cs *engine,
                           u32 *out_cycles, ktime_t *out_dt)
 {
@@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine,
 
        for (i = 0; i < 5; i++) {
                local_irq_disable();
-               cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP);
+               cycles[i] = -read_timestamp(engine);
                dt[i] = ktime_get();
 
                udelay(1000);
 
                dt[i] = ktime_sub(ktime_get(), dt[i]);
-               cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP);
+               cycles[i] += read_timestamp(engine);
                local_irq_enable();
        }
 
@@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg)
        if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
                return 0;
 
-       if (GRAPHICS_VER(gt->i915) == 5)
-               /*
-                * XXX CS_TIMESTAMP low dword is dysfunctional?
-                *
-                * Ville's experiments indicate the high dword still works,
-                * but at a correspondingly reduced frequency.
-                */
-               return 0;
-
-       if (GRAPHICS_VER(gt->i915) == 4)
-               /*
-                * XXX CS_TIMESTAMP appears gibberish
-                *
-                * Ville's experiments indicate that it mostly appears 'stuck'
-                * in that we see the register report the same cycle count
-                * for a couple of reads.
-                */
-               return 0;
-
        intel_gt_pm_get(gt);
        intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
index 7f3bb1d34dfbf63b906375c087a276dd821ea7b0..71263058a7b0515cef0023ceec1e076d88dd357c 100644 (file)
@@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg)
 }
 
 struct active_engine {
-       struct task_struct *task;
+       struct kthread_worker *worker;
+       struct kthread_work work;
        struct intel_engine_cs *engine;
        unsigned long resets;
        unsigned int flags;
+       bool stop;
+       int result;
 };
 
 #define TEST_ACTIVE    BIT(0)
@@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq)
        return err;
 }
 
-static int active_engine(void *data)
+static void active_engine(struct kthread_work *work)
 {
        I915_RND_STATE(prng);
-       struct active_engine *arg = data;
+       struct active_engine *arg = container_of(work, typeof(*arg), work);
        struct intel_engine_cs *engine = arg->engine;
        struct i915_request *rq[8] = {};
        struct intel_context *ce[ARRAY_SIZE(rq)];
@@ -913,16 +916,17 @@ static int active_engine(void *data)
        for (count = 0; count < ARRAY_SIZE(ce); count++) {
                ce[count] = intel_context_create(engine);
                if (IS_ERR(ce[count])) {
-                       err = PTR_ERR(ce[count]);
-                       pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
+                       arg->result = PTR_ERR(ce[count]);
+                       pr_err("[%s] Create context #%ld failed: %d!\n",
+                              engine->name, count, arg->result);
                        while (--count)
                                intel_context_put(ce[count]);
-                       return err;
+                       return;
                }
        }
 
        count = 0;
-       while (!kthread_should_stop()) {
+       while (!READ_ONCE(arg->stop)) {
                unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
                struct i915_request *old = rq[idx];
                struct i915_request *new;
@@ -967,7 +971,7 @@ static int active_engine(void *data)
                intel_context_put(ce[count]);
        }
 
-       return err;
+       arg->result = err;
 }
 
 static int __igt_reset_engines(struct intel_gt *gt,
@@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 
                memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
                for_each_engine(other, gt, tmp) {
-                       struct task_struct *tsk;
+                       struct kthread_worker *worker;
 
                        threads[tmp].resets =
                                i915_reset_engine_count(global, other);
@@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
                        threads[tmp].engine = other;
                        threads[tmp].flags = flags;
 
-                       tsk = kthread_run(active_engine, &threads[tmp],
-                                         "igt/%s", other->name);
-                       if (IS_ERR(tsk)) {
-                               err = PTR_ERR(tsk);
-                               pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
+                       worker = kthread_create_worker(0, "igt/%s",
+                                                      other->name);
+                       if (IS_ERR(worker)) {
+                               err = PTR_ERR(worker);
+                               pr_err("[%s] Worker create failed: %d!\n",
+                                      engine->name, err);
                                goto unwind;
                        }
 
-                       threads[tmp].task = tsk;
-                       get_task_struct(tsk);
-               }
+                       threads[tmp].worker = worker;
 
-               yield(); /* start all threads before we begin */
+                       kthread_init_work(&threads[tmp].work, active_engine);
+                       kthread_queue_work(threads[tmp].worker,
+                                          &threads[tmp].work);
+               }
 
                st_engine_heartbeat_disable_no_pm(engine);
                GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
@@ -1197,17 +1203,20 @@ unwind:
                for_each_engine(other, gt, tmp) {
                        int ret;
 
-                       if (!threads[tmp].task)
+                       if (!threads[tmp].worker)
                                continue;
 
-                       ret = kthread_stop(threads[tmp].task);
+                       WRITE_ONCE(threads[tmp].stop, true);
+                       kthread_flush_work(&threads[tmp].work);
+                       ret = READ_ONCE(threads[tmp].result);
                        if (ret) {
                                pr_err("kthread for other engine %s failed, err=%d\n",
                                       other->name, ret);
                                if (!err)
                                        err = ret;
                        }
-                       put_task_struct(threads[tmp].task);
+
+                       kthread_destroy_worker(threads[tmp].worker);
 
                        /* GuC based resets are not logged per engine */
                        if (!using_guc) {
index 2b0c8799994900312a744ef94f4ed1f642a1f962..0dc5309c90a4f5c37f175e2e68f580e8a47d2191 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/sort.h>
 
 #include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
 
 #include "selftests/i915_random.h"
 
index cfb4708dd62e6451aa811cdd721822e4417bc8b1..99a372486fb7f6e9de643f3949fca6f327fe3a32 100644 (file)
@@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms)
        return div64_u64(1000 * 1000 * dE, dt);
 }
 
-static u64 measure_power_at(struct intel_rps *rps, int *freq)
+static u64 measure_power(struct intel_rps *rps, int *freq)
 {
        u64 x[5];
        int i;
 
-       *freq = rps_set_check(rps, *freq);
        for (i = 0; i < 5; i++)
                x[i] = __measure_power(5);
-       *freq = (*freq + read_cagf(rps)) / 2;
+
+       *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
 
        /* A simple triangle filter for better result stability */
        sort(x, 5, sizeof(*x), cmp_u64, NULL);
        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 }
 
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
+{
+       *freq = rps_set_check(rps, *freq);
+       return measure_power(rps, freq);
+}
+
 int live_rps_power(void *arg)
 {
        struct intel_gt *gt = arg;
index f8a1d27df272f01c00ea4cba86d8fdf5f561fb87..82ec95a299f6a4245cb66ffaad74f909390da2ab 100644 (file)
@@ -11,7 +11,8 @@
 enum test_type {
        VARY_MIN,
        VARY_MAX,
-       MAX_GRANTED
+       MAX_GRANTED,
+       SLPC_POWER,
 };
 
 static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
@@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
        return ret;
 }
 
+static int slpc_set_freq(struct intel_gt *gt, u32 freq)
+{
+       int err;
+       struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+
+       err = slpc_set_max_freq(slpc, freq);
+       if (err) {
+               pr_err("Unable to update max freq");
+               return err;
+       }
+
+       err = slpc_set_min_freq(slpc, freq);
+       if (err) {
+               pr_err("Unable to update min freq");
+               return err;
+       }
+
+       return err;
+}
+
+static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
+{
+       int err = 0;
+
+       err = slpc_set_freq(gt, *freq);
+       if (err)
+               return err;
+       *freq = intel_rps_read_actual_frequency(&gt->rps);
+       *power = measure_power(&gt->rps, freq);
+
+       return err;
+}
+
 static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
                         u32 *max_act_freq)
 {
@@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
        return err;
 }
 
+static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
+{
+       struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+       struct {
+               u64 power;
+               int freq;
+       } min, max;
+       int err = 0;
+
+       /*
+        * Our fundamental assumption is that running at lower frequency
+        * actually saves power. Let's see if our RAPL measurement supports
+        * that theory.
+        */
+       if (!librapl_supported(gt->i915))
+               return 0;
+
+       min.freq = slpc->min_freq;
+       err = measure_power_at_freq(gt, &min.freq, &min.power);
+
+       if (err)
+               return err;
+
+       max.freq = slpc->rp0_freq;
+       err = measure_power_at_freq(gt, &max.freq, &max.power);
+
+       if (err)
+               return err;
+
+       pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
+               engine->name,
+               min.power, min.freq,
+               max.power, max.freq);
+
+       if (10 * min.freq >= 9 * max.freq) {
+               pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n",
+                         min.freq, max.freq);
+       }
+
+       if (11 * min.power > 10 * max.power) {
+               pr_err("%s: did not conserve power when setting lower frequency!\n",
+                      engine->name);
+               err = -EINVAL;
+       }
+
+       /* Restore min/max frequencies */
+       slpc_set_max_freq(slpc, slpc->rp0_freq);
+       slpc_set_min_freq(slpc, slpc->min_freq);
+
+       return err;
+}
+
 static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq)
 {
        struct intel_gt *gt = rps_to_gt(rps);
@@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type)
        if (!intel_uc_uses_guc_slpc(&gt->uc))
                return 0;
 
+       if (slpc->min_freq == slpc->rp0_freq) {
+               pr_err("Min/Max are fused to the same value\n");
+               return -EINVAL;
+       }
+
        if (igt_spinner_init(&spin, gt))
                return -ENOMEM;
 
@@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type)
        }
 
        /*
-        * FIXME: With efficient frequency enabled, GuC can request
-        * frequencies higher than the SLPC max. While this is fixed
-        * in GuC, we level set these tests with RPn as min.
+        * Set min frequency to RPn so that we can test the whole
+        * range of RPn-RP0. This also turns off efficient freq
+        * usage and makes results more predictable.
         */
        err = slpc_set_min_freq(slpc, slpc->min_freq);
-       if (err)
+       if (err) {
+               pr_err("Unable to update min freq!");
                return err;
-
-       if (slpc->min_freq == slpc->rp0_freq) {
-               pr_err("Min/Max are fused to the same value\n");
-               return -EINVAL;
        }
 
        intel_gt_pm_wait_for_idle(gt);
@@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type)
 
                        err = max_granted_freq(slpc, rps, &max_act_freq);
                        break;
+
+               case SLPC_POWER:
+                       err = slpc_power(gt, engine);
+                       break;
                }
 
-               pr_info("Max actual frequency for %s was %d\n",
-                       engine->name, max_act_freq);
+               if (test_type != SLPC_POWER) {
+                       pr_info("Max actual frequency for %s was %d\n",
+                               engine->name, max_act_freq);
 
-               /* Actual frequency should rise above min */
-               if (max_act_freq <= slpc_min_freq) {
-                       pr_err("Actual freq did not rise above min\n");
-                       pr_err("Perf Limit Reasons: 0x%x\n",
-                              intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
-                       err = -EINVAL;
+                       /* Actual frequency should rise above min */
+                       if (max_act_freq <= slpc->min_freq) {
+                               pr_err("Actual freq did not rise above min\n");
+                               pr_err("Perf Limit Reasons: 0x%x\n",
+                                      intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
+                               err = -EINVAL;
+                       }
                }
 
                igt_spinner_end(&spin);
@@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type)
 static int live_slpc_vary_min(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       struct intel_gt *gt = to_gt(i915);
+       struct intel_gt *gt;
+       unsigned int i;
+       int ret;
+
+       for_each_gt(gt, i915, i) {
+               ret = run_test(gt, VARY_MIN);
+               if (ret)
+                       return ret;
+       }
 
-       return run_test(gt, VARY_MIN);
+       return ret;
 }
 
 static int live_slpc_vary_max(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       struct intel_gt *gt = to_gt(i915);
+       struct intel_gt *gt;
+       unsigned int i;
+       int ret;
+
+       for_each_gt(gt, i915, i) {
+               ret = run_test(gt, VARY_MAX);
+               if (ret)
+                       return ret;
+       }
 
-       return run_test(gt, VARY_MAX);
+       return ret;
 }
 
 /* check if pcode can grant RP0 */
 static int live_slpc_max_granted(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       struct intel_gt *gt = to_gt(i915);
+       struct intel_gt *gt;
+       unsigned int i;
+       int ret;
 
-       return run_test(gt, MAX_GRANTED);
+       for_each_gt(gt, i915, i) {
+               ret = run_test(gt, MAX_GRANTED);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
+static int live_slpc_power(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_gt *gt;
+       unsigned int i;
+       int ret;
+
+       for_each_gt(gt, i915, i) {
+               ret = run_test(gt, SLPC_POWER);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
 }
 
 int intel_slpc_live_selftests(struct drm_i915_private *i915)
@@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
                SUBTEST(live_slpc_vary_max),
                SUBTEST(live_slpc_vary_min),
                SUBTEST(live_slpc_max_granted),
+               SUBTEST(live_slpc_power),
        };
 
-       if (intel_gt_is_wedged(to_gt(i915)))
-               return 0;
+       struct intel_gt *gt;
+       unsigned int i;
+
+       for_each_gt(gt, i915, i) {
+               if (intel_gt_is_wedged(gt))
+                       return 0;
+       }
 
        return i915_live_subtests(tests, i915);
 }
index 67a9aab801ddf1ac5e2cfca3281808be2dbb7fa5..21b1edc052f888e9974bbfbbf81c44e409e57854 100644 (file)
@@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
        /* Alas, we must pardon some whitelists. Mistakes already made */
        static const struct regmask pardon[] = {
                { GEN9_CTX_PREEMPT_REG, 9 },
-               { GEN8_L3SQCREG4, 9 },
+               { _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */
        };
 
        return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
index 9670310562029c005480528a1b996e39413ef6f6..f2d9858d827c23bd3a5bb16a4feaf8d1c9c7b4b8 100644 (file)
@@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
               const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_nsecs(2))
+       clamped = intel_clamp_max_busywait_duration_ns(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
@@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
                const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_timeslice_duration_ms(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
@@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
           const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long duration;
+       unsigned long long duration, clamped;
        int err;
 
        /*
@@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_stop_timeout_ms(engine, duration);
+       if (duration != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.stop_timeout_ms, duration);
@@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
                      const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long timeout;
+       unsigned long long timeout, clamped;
        int err;
 
        /*
@@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_preempt_timeout_ms(engine, timeout);
+       if (timeout != clamped)
                return -EINVAL;
 
        WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
@@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
                const char *buf, size_t count)
 {
        struct intel_engine_cs *engine = kobj_to_engine(kobj);
-       unsigned long long delay;
+       unsigned long long delay, clamped;
        int err;
 
        /*
@@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (err)
                return err;
 
-       if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+       clamped = intel_clamp_heartbeat_interval_ms(engine, delay);
+       if (delay != clamped)
                return -EINVAL;
 
        err = intel_engine_set_heartbeat(engine, delay);
index 29ef8afc8c2e45699f88fd064a40051c329e5ce2..f359bef046e0b273a850cc0698dea8d1eb39abf7 100644 (file)
@@ -117,6 +117,7 @@ enum intel_guc_action {
        INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
        INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
        INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+       INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
        INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000,
        INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
        INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
index 4c840a2639dc58663501aa22e5a0795693604cf3..811add10c30dc21a357841dccd10e6583468978c 100644 (file)
@@ -128,6 +128,15 @@ enum slpc_media_ratio_mode {
        SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
 };
 
+enum slpc_gucrc_mode {
+       SLPC_GUCRC_MODE_HW = 0,
+       SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+       SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+       SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+       SLPC_GUCRC_MODE_MAX,
+};
+
 enum slpc_event_id {
        SLPC_EVENT_RESET = 0,
        SLPC_EVENT_SHUTDOWN = 1,
index 4a59478c3b5c4b07e1cff81246f71906182b9640..58012edd4eb0ecae87d4247880edad01b5e2db93 100644 (file)
 #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY              0x0907
 #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN              1u
 
+/*
+ * Global scheduling policy update keys.
+ */
+enum {
+       GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD     = 0x1001,
+};
+
 /*
  * Per context scheduling policy update keys.
  */
-enum  {
+enum {
        GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM                   = 0x2001,
        GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT                  = 0x2002,
        GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY                 = 0x2003,
index bac06e3d6f2cc9cd40360763d46eccbc2d5ef39e..27b09ba1d295fc902d26fa6e9bbcb62fed33aadc 100644 (file)
@@ -441,6 +441,7 @@ err_log:
 err_fw:
        intel_uc_fw_fini(&guc->fw);
 out:
+       intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
        i915_probe_error(gt->i915, "failed with %d\n", ret);
        return ret;
 }
index 804133df1ac9b4ce1b51462b0ac04110f5b94a68..357873ef692bdb468d0f866ae38afe39db2f8818 100644 (file)
@@ -112,6 +112,10 @@ struct intel_guc {
                 * refs
                 */
                struct list_head guc_id_list;
+               /**
+                * @guc_ids_in_use: Number single-lrc guc_ids in use
+                */
+               unsigned int guc_ids_in_use;
                /**
                 * @destroyed_contexts: list of contexts waiting to be destroyed
                 * (deregistered with the GuC)
@@ -132,6 +136,16 @@ struct intel_guc {
                 * @reset_fail_mask: mask of engines that failed to reset
                 */
                intel_engine_mask_t reset_fail_mask;
+               /**
+                * @sched_disable_delay_ms: schedule disable delay, in ms, for
+                * contexts
+                */
+               unsigned int sched_disable_delay_ms;
+               /**
+                * @sched_disable_gucid_threshold: threshold of min remaining available
+                * guc_ids before we start bypassing the schedule disable delay
+                */
+               unsigned int sched_disable_gucid_threshold;
        } submission_state;
 
        /**
@@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc);
 
 void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
 
+int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
+
 #endif
index 74cbe8eaf531884f5e27fda2f413d427c6934613..a419d60166c8782e29a385eb69f9f6d2d5b75c69 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/bsearch.h>
 
+#include "gem/i915_gem_lmem.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_mcr.h"
@@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
        return slot;
 }
 
-#define GUC_REGSET_STEERING(group, instance) ( \
-       FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
-       FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
-       GUC_REGSET_NEEDS_STEERING \
-)
-
 static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
                                          struct temp_regset *regset,
-                                         i915_reg_t reg, u32 flags)
+                                         u32 offset, u32 flags)
 {
        u32 count = regset->storage_used - (regset->registers - regset->storage);
-       u32 offset = i915_mmio_reg_offset(reg);
        struct guc_mmio_reg entry = {
                .offset = offset,
                .flags = flags,
        };
        struct guc_mmio_reg *slot;
-       u8 group, inst;
 
        /*
         * The mmio list is built using separate lists within the driver.
@@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
                    sizeof(entry), guc_mmio_reg_cmp))
                return 0;
 
-       /*
-        * The GuC doesn't have a default steering, so we need to explicitly
-        * steer all registers that need steering. However, we do not keep track
-        * of all the steering ranges, only of those that have a chance of using
-        * a non-default steering from the i915 pov. Instead of adding such
-        * tracking, it is easier to just program the default steering for all
-        * regs that don't need a non-default one.
-        */
-       intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
-       entry.flags |= GUC_REGSET_STEERING(group, inst);
-
        slot = __mmio_reg_add(regset, &entry);
        if (IS_ERR(slot))
                return PTR_ERR(slot);
@@ -334,6 +316,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
 
 #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
        guc_mmio_reg_add(gt, \
+                        regset, \
+                        i915_mmio_reg_offset(reg), \
+                        (masked) ? GUC_REGSET_MASKED : 0)
+
+#define GUC_REGSET_STEERING(group, instance) ( \
+       FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
+       FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
+       GUC_REGSET_NEEDS_STEERING \
+)
+
+static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
+                                        struct temp_regset *regset,
+                                        i915_mcr_reg_t reg, u32 flags)
+{
+       u8 group, inst;
+
+       /*
+        * The GuC doesn't have a default steering, so we need to explicitly
+        * steer all registers that need steering. However, we do not keep track
+        * of all the steering ranges, only of those that have a chance of using
+        * a non-default steering from the i915 pov. Instead of adding such
+        * tracking, it is easier to just program the default steering for all
+        * regs that don't need a non-default one.
+        */
+       intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
+       flags |= GUC_REGSET_STEERING(group, inst);
+
+       return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
+}
+
+#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
+       guc_mcr_reg_add(gt, \
                         regset, \
                         (reg), \
                         (masked) ? GUC_REGSET_MASKED : 0)
@@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
                                        false);
 
        /* add in local MOCS registers */
-       for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
-               ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
+       for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
+               if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+                       ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
+               else
+                       ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
+
+       if (GRAPHICS_VER(engine->i915) >= 12) {
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
+       }
 
        return ret ? -1 : 0;
 }
index 8f116514601318aa6a62f75c39f36509a7a75d54..4e6dca707d941d78a66b26d52bde3ef49e47ca78 100644 (file)
@@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = {
        MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
        MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
        MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+       MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
+       MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
        MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
        MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
        MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
        MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
        MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
        MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+       MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
+       MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
        MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
        MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
        MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist
 
 struct __ext_steer_reg {
        const char *name;
-       i915_reg_t reg;
+       i915_mcr_reg_t reg;
 };
 
 static const struct __ext_steer_reg xe_extregs[] = {
-       {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
-       {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
+       {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE},
+       {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE}
 };
 
 static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
                           const struct __ext_steer_reg *extlist,
                           int slice_id, int subslice_id)
 {
-       ext->reg = extlist->reg;
+       ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg));
        ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
        ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
        ext->regname = extlist->name;
@@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc)
        return default_lists;
 }
 
+static const char *
+__stringify_type(u32 type)
+{
+       switch (type) {
+       case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+               return "Global";
+       case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+               return "Class";
+       case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+               return "Instance";
+       default:
+               break;
+       }
+
+       return "unknown";
+}
+
+static const char *
+__stringify_engclass(u32 class)
+{
+       switch (class) {
+       case GUC_RENDER_CLASS:
+               return "Render";
+       case GUC_VIDEO_CLASS:
+               return "Video";
+       case GUC_VIDEOENHANCE_CLASS:
+               return "VideoEnhance";
+       case GUC_BLITTER_CLASS:
+               return "Blitter";
+       case GUC_COMPUTE_CLASS:
+               return "Compute";
+       default:
+               break;
+       }
+
+       return "unknown";
+}
+
 static int
 guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
                      struct guc_mmio_reg *ptr, u16 num_entries)
@@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u
        return num_regs;
 }
 
-int
-intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
-                             size_t *size)
+static int
+guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                       size_t *size, bool is_purpose_est)
 {
        struct intel_guc_state_capture *gc = guc->capture;
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
        struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
        int num_regs;
 
-       if (!gc->reglists)
+       if (!gc->reglists) {
+               drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n");
                return -ENODEV;
+       }
 
        if (cache->is_valid) {
                *size = cache->size;
                return cache->status;
        }
 
+       if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
+           !guc_capture_get_one_list(gc->reglists, owner, type, classid)) {
+               if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
+                       drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n");
+               else
+                       drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n",
+                                __stringify_type(type), type,
+                                __stringify_engclass(classid), classid);
+               return -ENODATA;
+       }
+
        num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
+       /* intentional empty lists can exist depending on hw config */
        if (!num_regs)
                return -ENODATA;
 
-       *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
-                          (num_regs * sizeof(struct guc_mmio_reg)));
+       if (size)
+               *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+                                  (num_regs * sizeof(struct guc_mmio_reg)));
 
        return 0;
 }
 
+int
+intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                             size_t *size)
+{
+       return guc_capture_getlistsize(guc, owner, type, classid, size, false);
+}
+
 static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
 
 int
@@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
        struct intel_gt *gt = guc_to_gt(guc);
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
-       int worst_min_size = 0, num_regs = 0;
+       int worst_min_size = 0;
        size_t tmp = 0;
 
        if (!guc->capture)
@@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
                worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
                                         (3 * sizeof(struct guc_state_capture_header_t));
 
-               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
-                       num_regs += tmp;
+               if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true))
+                       worst_min_size += tmp;
 
-               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
-                                                  engine->class, &tmp)) {
-                       num_regs += tmp;
+               if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+                                            engine->class, &tmp, true)) {
+                       worst_min_size += tmp;
                }
-               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
-                                                  engine->class, &tmp)) {
-                       num_regs += tmp;
+               if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+                                            engine->class, &tmp, true)) {
+                       worst_min_size += tmp;
                }
        }
 
-       worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
-
        return worst_min_size;
 }
 
@@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc)
        int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
        u32 buffer_size = intel_guc_log_section_size_capture(&guc->log);
 
+       /*
+        * NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB)
+        * Additionally, its based on space needed to fit all engines getting reset at once
+        * within the same G2H handler task slot. This is very unlikely. However, if GuC really
+        * does run out of space for whatever reason, we will see an separate warning message
+        * when processing the G2H event capture-notification, search for:
+        * INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
+        */
        if (min_size < 0)
                drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
                         min_size);
        else if (min_size > buffer_size)
-               drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
+               drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n",
                         buffer_size, min_size);
        else if (spare_size > buffer_size)
-               drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
-                          buffer_size, spare_size, min_size);
+               drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n",
+                       buffer_size, spare_size, min_size);
 }
 
 /*
index 25f09a420561bf410c52b18e5b3b2b6c8020830b..7269eb0bbedf51714d0b09a40ea4e8cec63bd646 100644 (file)
@@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data)
        return intel_guc_slpc_is_used(guc);
 }
 
+static int guc_sched_disable_delay_ms_get(void *data, u64 *val)
+{
+       struct intel_guc *guc = data;
+
+       if (!intel_guc_submission_is_used(guc))
+               return -ENODEV;
+
+       *val = (u64)guc->submission_state.sched_disable_delay_ms;
+
+       return 0;
+}
+
+static int guc_sched_disable_delay_ms_set(void *data, u64 val)
+{
+       struct intel_guc *guc = data;
+
+       if (!intel_guc_submission_is_used(guc))
+               return -ENODEV;
+
+       /* clamp to a practical limit, 1 minute is reasonable for a longest delay */
+       guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000);
+
+       return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops,
+                       guc_sched_disable_delay_ms_get,
+                       guc_sched_disable_delay_ms_set, "%lld\n");
+
+static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val)
+{
+       struct intel_guc *guc = data;
+
+       if (!intel_guc_submission_is_used(guc))
+               return -ENODEV;
+
+       *val = guc->submission_state.sched_disable_gucid_threshold;
+       return 0;
+}
+
+static int guc_sched_disable_gucid_threshold_set(void *data, u64 val)
+{
+       struct intel_guc *guc = data;
+
+       if (!intel_guc_submission_is_used(guc))
+               return -ENODEV;
+
+       if (val > intel_guc_sched_disable_gucid_threshold_max(guc))
+               guc->submission_state.sched_disable_gucid_threshold =
+                       intel_guc_sched_disable_gucid_threshold_max(guc);
+       else
+               guc->submission_state.sched_disable_gucid_threshold = val;
+
+       return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops,
+                       guc_sched_disable_gucid_threshold_get,
+                       guc_sched_disable_gucid_threshold_set, "%lld\n");
+
 void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
 {
        static const struct intel_gt_debugfs_file files[] = {
                { "guc_info", &guc_info_fops, NULL },
                { "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
                { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
+               { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL },
+               { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops,
+                  NULL },
        };
 
        if (!intel_guc_is_supported(guc))
index a0372735cddb1d7b84e89762ff3a100182ee0ed6..5b86b2e286e0703e85dcf1f52492f83d59c42045 100644 (file)
  */
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 #include "intel_guc_fw.h"
 #include "i915_drv.h"
 
-static void guc_prepare_xfer(struct intel_uncore *uncore)
+static void guc_prepare_xfer(struct intel_gt *gt)
 {
+       struct intel_uncore *uncore = gt->uncore;
+
        u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
                         GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
                         GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
@@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
 
        if (GRAPHICS_VER(uncore->i915) == 9) {
                /* DOP Clock Gating Enable for GuC clocks */
-               intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
-                                0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
+               intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
+                                            GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
+                                            intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
 
                /* allows for 5us (in 10ns units) before GT can go to RC6 */
                intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
@@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
        struct intel_uncore *uncore = gt->uncore;
        int ret;
 
-       guc_prepare_xfer(uncore);
+       guc_prepare_xfer(gt);
 
        /*
         * Note that GuC needs the CSS header plus uKernel code to be copied
index 323b055e5db97e9c8543a95c4347bc278b9b869e..968ebd79dce70edd30c87117f61767a1d7d099cb 100644 (file)
@@ -290,6 +290,25 @@ struct guc_update_context_policy {
        struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
 } __packed;
 
+/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */
+struct guc_update_scheduling_policy_header {
+       u32 action;
+} __packed;
+
+/*
+ * Can't dynmically allocate memory for the scheduling policy KLV because
+ * it will be sent from within the reset path. Need a fixed size lump on
+ * the stack instead :(.
+ *
+ * Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V.
+ */
+#define MAX_SCHEDULING_POLICY_SIZE 3
+
+struct guc_update_scheduling_policy {
+       struct guc_update_scheduling_policy_header header;
+       u32 data[MAX_SCHEDULING_POLICY_SIZE];
+} __packed;
+
 #define GUC_POWER_UNSPECIFIED  0
 #define GUC_POWER_D0           1
 #define GUC_POWER_D1           2
@@ -298,6 +317,9 @@ struct guc_update_context_policy {
 
 /* Scheduling policy settings */
 
+#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION       100     /* in ms */
+#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO          50      /* in percent */
+
 #define GLOBAL_POLICY_MAX_NUM_WI 15
 
 /* Don't reset an engine upon preemption failure */
@@ -305,6 +327,27 @@ struct guc_update_context_policy {
 
 #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
 
+/*
+ * GuC converts the timeout to clock ticks internally. Different platforms have
+ * different GuC clocks. Thus, the maximum value before overflow is platform
+ * dependent. Current worst case scenario is about 110s. So, the spec says to
+ * limit to 100s to be safe.
+ */
+#define GUC_POLICY_MAX_EXEC_QUANTUM_US         (100 * 1000 * 1000UL)
+#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US      (100 * 1000 * 1000UL)
+
+static inline u32 guc_policy_max_exec_quantum_ms(void)
+{
+       BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX);
+       return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000;
+}
+
+static inline u32 guc_policy_max_preempt_timeout_ms(void)
+{
+       BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX);
+       return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000;
+}
+
 struct guc_policies {
        u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
        /* In micro seconds. How much time to allow before DPC processing is
index 55d3ef93e86f856b4e04f10f249af80240bc71d7..68331c538b0a7959425fc914a0facf391d9713f6 100644 (file)
 #if defined(CONFIG_DRM_I915_DEBUG_GUC)
 #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE      SZ_2M
 #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE      SZ_16M
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_4M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_1M
 #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
 #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE      SZ_1M
 #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE      SZ_2M
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_4M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_1M
 #else
 #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE      SZ_8K
 #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE      SZ_64K
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_2M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    SZ_1M
 #endif
 
 static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
index fdd895f73f9f1f51b0a5d4cfa8a93e0801a40aec..63464933cbcebb72e3b4a13ff21f5b57e35f67c1 100644 (file)
@@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
        return ret > 0 ? -EPROTO : ret;
 }
 
+static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
+{
+       u32 request[] = {
+               GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+               SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
+               id,
+       };
+
+       return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
 static bool slpc_is_running(struct intel_guc_slpc *slpc)
 {
        return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
        return ret;
 }
 
+static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
+{
+       struct intel_guc *guc = slpc_to_guc(slpc);
+
+       GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+       return guc_action_slpc_unset_param(guc, id);
+}
+
 static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 {
        struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
 
        slpc->max_freq_softlimit = 0;
        slpc->min_freq_softlimit = 0;
+       slpc->min_is_rpmax = false;
 
        slpc->boost_freq = 0;
        atomic_set(&slpc->num_waiters, 0);
@@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
        return 0;
 }
 
+static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       int slpc_min_freq;
+       int ret;
+
+       ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq);
+       if (ret) {
+               drm_err(&i915->drm,
+                       "Failed to get min freq: (%d)\n",
+                       ret);
+               return false;
+       }
+
+       if (slpc_min_freq == SLPC_MAX_FREQ_MHZ)
+               return true;
+       else
+               return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+       /* For server parts, SLPC min will be at RPMax.
+        * Use min softlimit to clamp it to RP0 instead.
+        */
+       if (!slpc->min_freq_softlimit &&
+           is_slpc_min_freq_rpmax(slpc)) {
+               slpc->min_is_rpmax = true;
+               slpc->min_freq_softlimit = slpc->rp0_freq;
+               (slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit;
+       }
+}
+
 static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
 {
        /* Force SLPC to used platform rp0 */
@@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
                slpc->boost_freq = slpc->rp0_freq;
 }
 
+/**
+ * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
+ * @slpc: pointer to intel_guc_slpc.
+ * @mode: new value of the mode.
+ *
+ * This function will override the GUCRC mode.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
+{
+       int ret;
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       intel_wakeref_t wakeref;
+
+       if (mode >= SLPC_GUCRC_MODE_MAX)
+               return -EINVAL;
+
+       with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+               ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
+               if (ret)
+                       drm_err(&i915->drm,
+                               "Override gucrc mode %d failed %d\n",
+                               mode, ret);
+       }
+
+       return ret;
+}
+
+int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
+{
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       intel_wakeref_t wakeref;
+       int ret = 0;
+
+       with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+               ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
+               if (ret)
+                       drm_err(&i915->drm,
+                               "Unsetting gucrc mode failed %d\n",
+                               ret);
+       }
+
+       return ret;
+}
+
 /*
  * intel_guc_slpc_enable() - Start SLPC
  * @slpc: pointer to intel_guc_slpc.
@@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
 
        slpc_get_rp_values(slpc);
 
+       /* Handle the case where min=max=RPmax */
+       update_server_min_softlimit(slpc);
+
        /* Set SLPC max limit to RP0 */
        ret = slpc_use_fused_rp0(slpc);
        if (unlikely(ret)) {
index 82a98f78f96c3f71accd8dba6e3d424315f79680..17ed515f6a852bbd4942af15f6860c15882f5614 100644 (file)
@@ -9,6 +9,8 @@
 #include "intel_guc_submission.h"
 #include "intel_guc_slpc_types.h"
 
+#define SLPC_MAX_FREQ_MHZ 4250
+
 struct intel_gt;
 struct drm_printer;
 
@@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
 void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
 void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
 void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
+int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
+int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
 
 #endif
index 73d208123528ff740a8b25d1defa75148b04b3dc..a6ef53b04e04758fd828779241c549b302a6fd33 100644 (file)
@@ -19,6 +19,9 @@ struct intel_guc_slpc {
        bool supported;
        bool selected;
 
+       /* Indicates this is a server part */
+       bool min_is_rpmax;
+
        /* platform frequency limits */
        u32 min_freq;
        u32 rp0_freq;
index 1db59eeb34db9e02782b50900d660aff97803ec9..4ccb29f9ac55ca5b59cfadb58d54a7e64f59cf3e 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/circ_buf.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_lmem.h"
 #include "gt/gen8_engine_cs.h"
 #include "gt/intel_breadcrumbs.h"
 #include "gt/intel_context.h"
  * corresponding G2H returns indicating the scheduling disable operation has
  * completed it is safe to unpin the context. While a disable is in flight it
  * isn't safe to resubmit the context so a fence is used to stall all future
- * requests of that context until the G2H is returned.
+ * requests of that context until the G2H is returned. Because this interaction
+ * with the GuC takes a non-zero amount of time we delay the disabling of
+ * scheduling after the pin count goes to zero by a configurable period of time
+ * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
+ * time to resubmit something on the context before doing this costly operation.
+ * This delay is only done if the context isn't closed and the guc_id usage is
+ * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
  *
  * Context deregistration:
  * Before a context can be destroyed or if we steal its guc_id we must
@@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
 #define SCHED_STATE_PENDING_ENABLE                     BIT(5)
 #define SCHED_STATE_REGISTERED                         BIT(6)
 #define SCHED_STATE_POLICY_REQUIRED                    BIT(7)
-#define SCHED_STATE_BLOCKED_SHIFT                      8
+#define SCHED_STATE_CLOSED                             BIT(8)
+#define SCHED_STATE_BLOCKED_SHIFT                      9
 #define SCHED_STATE_BLOCKED            BIT(SCHED_STATE_BLOCKED_SHIFT)
 #define SCHED_STATE_BLOCKED_MASK       (0xfff << SCHED_STATE_BLOCKED_SHIFT)
 
@@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce)
        ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
 }
 
+/*
+ * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
+ * A context close can race with the submission path, so SCHED_STATE_CLOSED
+ * can be set immediately before we try to register.
+ */
+#define SCHED_STATE_VALID_INIT \
+       (SCHED_STATE_BLOCKED_MASK | \
+        SCHED_STATE_CLOSED | \
+        SCHED_STATE_REGISTERED)
+
 __maybe_unused
 static bool sched_state_is_init(struct intel_context *ce)
 {
-       /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
-       return !(ce->guc_state.sched_state &
-                ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
+       return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
 }
 
 static inline bool
@@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce)
        ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
 }
 
+static inline bool context_close_done(struct intel_context *ce)
+{
+       return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
+}
+
+static inline void set_context_close_done(struct intel_context *ce)
+{
+       lockdep_assert_held(&ce->guc_state.lock);
+       ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
+}
+
 static inline u32 context_blocked(struct intel_context *ce)
 {
        return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce)
        ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
 }
 
-static inline bool context_has_committed_requests(struct intel_context *ce)
-{
-       return !!ce->guc_state.number_committed_requests;
-}
-
-static inline void incr_context_committed_requests(struct intel_context *ce)
-{
-       lockdep_assert_held(&ce->guc_state.lock);
-       ++ce->guc_state.number_committed_requests;
-       GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
-}
-
-static inline void decr_context_committed_requests(struct intel_context *ce)
-{
-       lockdep_assert_held(&ce->guc_state.lock);
-       --ce->guc_state.number_committed_requests;
-       GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
-}
-
 static struct intel_context *
 request_to_scheduling_context(struct i915_request *rq)
 {
@@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 
                xa_unlock(&guc->context_lookup);
 
+               if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
+                   (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
+                       /* successful cancel so jump straight to close it */
+                       intel_context_sched_disable_unpin(ce);
+               }
+
                spin_lock(&ce->guc_state.lock);
 
                /*
@@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
        if (unlikely(ret < 0))
                return ret;
 
+       if (!intel_context_is_parent(ce))
+               ++guc->submission_state.guc_ids_in_use;
+
        ce->guc_id.id = ret;
        return 0;
 }
@@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
        GEM_BUG_ON(intel_context_is_child(ce));
 
        if (!context_guc_id_invalid(ce)) {
-               if (intel_context_is_parent(ce))
+               if (intel_context_is_parent(ce)) {
                        bitmap_release_region(guc->submission_state.guc_ids_bitmap,
                                              ce->guc_id.id,
                                              order_base_2(ce->parallel.number_children
                                                           + 1));
-               else
+               } else {
+                       --guc->submission_state.guc_ids_in_use;
                        ida_simple_remove(&guc->submission_state.guc_ids,
                                          ce->guc_id.id);
+               }
                clr_ctx_id_mapping(guc, ce->guc_id.id);
                set_context_guc_id_invalid(ce);
        }
@@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
        int ret;
 
        /* NB: For both of these, zero means disabled. */
+       GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+                                 execution_quantum));
+       GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+                                 preemption_timeout));
        execution_quantum = engine->props.timeslice_duration_ms * 1000;
        preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 
@@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
                desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
 
        /* NB: For both of these, zero means disabled. */
+       GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+                                 desc->execution_quantum));
+       GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+                                 desc->preemption_timeout));
        desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
        desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 }
@@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
        }
 }
 
-static void guc_context_sched_disable(struct intel_context *ce)
+static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
+                            unsigned long flags)
+       __releases(ce->guc_state.lock)
 {
-       struct intel_guc *guc = ce_to_guc(ce);
-       unsigned long flags;
        struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
        intel_wakeref_t wakeref;
        u16 guc_id;
 
+       lockdep_assert_held(&ce->guc_state.lock);
+       guc_id = prep_context_pending_disable(ce);
+
+       spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+       with_intel_runtime_pm(runtime_pm, wakeref)
+               __guc_context_sched_disable(guc, ce, guc_id);
+}
+
+static bool bypass_sched_disable(struct intel_guc *guc,
+                                struct intel_context *ce)
+{
+       lockdep_assert_held(&ce->guc_state.lock);
        GEM_BUG_ON(intel_context_is_child(ce));
 
+       if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
+           !ctx_id_mapped(guc, ce->guc_id.id)) {
+               clr_context_enabled(ce);
+               return true;
+       }
+
+       return !context_enabled(ce);
+}
+
+static void __delay_sched_disable(struct work_struct *wrk)
+{
+       struct intel_context *ce =
+               container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
+       struct intel_guc *guc = ce_to_guc(ce);
+       unsigned long flags;
+
        spin_lock_irqsave(&ce->guc_state.lock, flags);
 
+       if (bypass_sched_disable(guc, ce)) {
+               spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+               intel_context_sched_disable_unpin(ce);
+       } else {
+               do_sched_disable(guc, ce, flags);
+       }
+}
+
+static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
+{
        /*
-        * We have to check if the context has been disabled by another thread,
-        * check if submssion has been disabled to seal a race with reset and
-        * finally check if any more requests have been committed to the
-        * context ensursing that a request doesn't slip through the
-        * 'context_pending_disable' fence.
+        * parent contexts are perma-pinned, if we are unpinning do schedule
+        * disable immediately.
         */
-       if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
-                    context_has_committed_requests(ce))) {
-               clr_context_enabled(ce);
+       if (intel_context_is_parent(ce))
+               return true;
+
+       /*
+        * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
+        */
+       return guc->submission_state.guc_ids_in_use >
+               guc->submission_state.sched_disable_gucid_threshold;
+}
+
+static void guc_context_sched_disable(struct intel_context *ce)
+{
+       struct intel_guc *guc = ce_to_guc(ce);
+       u64 delay = guc->submission_state.sched_disable_delay_ms;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ce->guc_state.lock, flags);
+
+       if (bypass_sched_disable(guc, ce)) {
                spin_unlock_irqrestore(&ce->guc_state.lock, flags);
-               goto unpin;
+               intel_context_sched_disable_unpin(ce);
+       } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
+                  delay) {
+               spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+               mod_delayed_work(system_unbound_wq,
+                                &ce->guc_state.sched_disable_delay_work,
+                                msecs_to_jiffies(delay));
+       } else {
+               do_sched_disable(guc, ce, flags);
        }
-       guc_id = prep_context_pending_disable(ce);
+}
 
-       spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+static void guc_context_close(struct intel_context *ce)
+{
+       unsigned long flags;
 
-       with_intel_runtime_pm(runtime_pm, wakeref)
-               __guc_context_sched_disable(guc, ce, guc_id);
+       if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
+           cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
+               __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
 
-       return;
-unpin:
-       intel_context_sched_disable_unpin(ce);
+       spin_lock_irqsave(&ce->guc_state.lock, flags);
+       set_context_close_done(ce);
+       spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 }
 
 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
@@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce)
                   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
                   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
                   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
-       GEM_BUG_ON(ce->guc_state.number_committed_requests);
 
        lrc_fini(ce);
        intel_context_fini(ce);
@@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq)
 
        guc_prio_fini(rq, ce);
 
-       decr_context_committed_requests(ce);
-
        spin_unlock_irq(&ce->guc_state.lock);
 
        atomic_dec(&ce->guc_id.ref);
@@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq)
 static const struct intel_context_ops guc_context_ops = {
        .alloc = guc_context_alloc,
 
+       .close = guc_context_close,
+
        .pre_pin = guc_context_pre_pin,
        .pin = guc_context_pin,
        .unpin = guc_context_unpin,
@@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce)
        rcu_read_unlock();
 
        ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
+
+       INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
+                         __delay_sched_disable);
+
        set_bit(CONTEXT_GUC_INIT, &ce->flags);
 }
 
@@ -3470,6 +3563,26 @@ static int guc_request_alloc(struct i915_request *rq)
        if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
                guc_context_init(ce);
 
+       /*
+        * If the context gets closed while the execbuf is ongoing, the context
+        * close code will race with the below code to cancel the delayed work.
+        * If the context close wins the race and cancels the work, it will
+        * immediately call the sched disable (see guc_context_close), so there
+        * is a chance we can get past this check while the sched_disable code
+        * is being executed. To make sure that code completes before we check
+        * the status further down, we wait for the close process to complete.
+        * Else, this code path could send a request down thinking that the
+        * context is still in a schedule-enable mode while the GuC ends up
+        * dropping the request completely because the disable did go from the
+        * context_close path right to GuC just prior. In the event the CT is
+        * full, we could potentially need to wait up to 1.5 seconds.
+        */
+       if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
+               intel_context_sched_disable_unpin(ce);
+       else if (intel_context_is_closed(ce))
+               if (wait_for(context_close_done(ce), 1500))
+                       drm_warn(&guc_to_gt(guc)->i915->drm,
+                                "timed out waiting on context sched close before realloc\n");
        /*
         * Call pin_guc_id here rather than in the pinning step as with
         * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
@@ -3524,7 +3637,6 @@ out:
 
                list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
        }
-       incr_context_committed_requests(ce);
        spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 
        return 0;
@@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
 static const struct intel_context_ops virtual_guc_context_ops = {
        .alloc = guc_virtual_context_alloc,
 
+       .close = guc_context_close,
+
        .pre_pin = guc_virtual_context_pre_pin,
        .pin = guc_virtual_context_pin,
        .unpin = guc_virtual_context_unpin,
@@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref)
 static const struct intel_context_ops virtual_parent_context_ops = {
        .alloc = guc_virtual_context_alloc,
 
+       .close = guc_context_close,
+
        .pre_pin = guc_context_pre_pin,
        .pin = guc_parent_context_pin,
        .unpin = guc_parent_context_unpin,
@@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
 
        engine->emit_bb_start = gen8_emit_bb_start;
        if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
-               engine->emit_bb_start = gen125_emit_bb_start;
+               engine->emit_bb_start = xehp_emit_bb_start;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
        return 0;
 }
 
+struct scheduling_policy {
+       /* internal data */
+       u32 max_words, num_words;
+       u32 count;
+       /* API data */
+       struct guc_update_scheduling_policy h2g;
+};
+
+static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
+{
+       u32 *start = (void *)&policy->h2g;
+       u32 *end = policy->h2g.data + policy->num_words;
+       size_t delta = end - start;
+
+       return delta;
+}
+
+static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
+{
+       policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
+       policy->max_words = ARRAY_SIZE(policy->h2g.data);
+       policy->num_words = 0;
+       policy->count = 0;
+
+       return policy;
+}
+
+static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
+                                           u32 action, u32 *data, u32 len)
+{
+       u32 *klv_ptr = policy->h2g.data + policy->num_words;
+
+       GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
+       *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
+                      FIELD_PREP(GUC_KLV_0_LEN, len);
+       memcpy(klv_ptr, data, sizeof(u32) * len);
+       policy->num_words += 1 + len;
+       policy->count++;
+}
+
+static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
+                                               struct scheduling_policy *policy)
+{
+       int ret;
+
+       ret = intel_guc_send(guc, (u32 *)&policy->h2g,
+                            __guc_scheduling_policy_action_size(policy));
+       if (ret < 0)
+               return ret;
+
+       if (ret != policy->count) {
+               drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
+                        ret, policy->count);
+               if (ret > policy->count)
+                       return -EPROTO;
+       }
+
+       return 0;
+}
+
+static int guc_init_global_schedule_policy(struct intel_guc *guc)
+{
+       struct scheduling_policy policy;
+       struct intel_gt *gt = guc_to_gt(guc);
+       intel_wakeref_t wakeref;
+       int ret = 0;
+
+       if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
+               return 0;
+
+       __guc_scheduling_policy_start_klv(&policy);
+
+       with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
+               u32 yield[] = {
+                       GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
+                       GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
+               };
+
+               __guc_scheduling_policy_add_klv(&policy,
+                                               GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
+                                               yield, ARRAY_SIZE(yield));
+
+               ret = __guc_action_set_scheduling_policies(guc, &policy);
+               if (ret)
+                       i915_probe_error(gt->i915,
+                                        "Failed to configure global scheduling policies: %pe!\n",
+                                        ERR_PTR(ret));
+       }
+
+       return ret;
+}
+
 void intel_guc_submission_enable(struct intel_guc *guc)
 {
        struct intel_gt *gt = guc_to_gt(guc);
@@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc)
 
        guc_init_lrc_mapping(guc);
        guc_init_engine_stats(guc);
+       guc_init_global_schedule_policy(guc);
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc)
        return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
 }
 
+int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
+{
+       return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
+}
+
+/*
+ * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
+ * workloads are able to enjoy the latency reduction when delaying the schedule-disable
+ * operation. This matches the 30fps game-render + encode (real world) workload this
+ * knob was tested against.
+ */
+#define SCHED_DISABLE_DELAY_MS 34
+
+/*
+ * A threshold of 75% is a reasonable starting point considering that real world apps
+ * generally don't get anywhere near this.
+ */
+#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
+       (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
+
 void intel_guc_submission_init_early(struct intel_guc *guc)
 {
        xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
@@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
        spin_lock_init(&guc->timestamp.lock);
        INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
 
+       guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
        guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
+       guc->submission_state.sched_disable_gucid_threshold =
+               NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
        guc->submission_supported = __guc_submission_supported(guc);
        guc->submission_selected = __guc_submission_selected(guc);
 }
index 3bb8838e325a4121ce478c526eb7348d4b4f62d3..fbc8bae14f76a3d7982efcdf996e5f984c931e02 100644 (file)
@@ -10,6 +10,9 @@
 #include "intel_huc.h"
 #include "i915_drv.h"
 
+#include <linux/device/bus.h>
+#include <linux/mei_aux.h>
+
 /**
  * DOC: HuC
  *
  * HuC-specific commands.
  */
 
+/*
+ * MEI-GSC load is an async process. The probing of the exposed aux device
+ * (see intel_gsc.c) usually happens a few seconds after i915 probe, depending
+ * on when the kernel schedules it. Unless something goes terribly wrong, we're
+ * guaranteed for this to happen during boot, so the big timeout is a safety net
+ * that we never expect to need.
+ * MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed
+ * and/or reset, this can take longer. Note that the kernel might schedule
+ * other work between the i915 init/resume and the MEI one, which can add to
+ * the delay.
+ */
+#define GSC_INIT_TIMEOUT_MS 10000
+#define PXP_INIT_TIMEOUT_MS 5000
+
+static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
+                                enum i915_sw_fence_notify state)
+{
+       return NOTIFY_DONE;
+}
+
+static void __delayed_huc_load_complete(struct intel_huc *huc)
+{
+       if (!i915_sw_fence_done(&huc->delayed_load.fence))
+               i915_sw_fence_complete(&huc->delayed_load.fence);
+}
+
+static void delayed_huc_load_complete(struct intel_huc *huc)
+{
+       hrtimer_cancel(&huc->delayed_load.timer);
+       __delayed_huc_load_complete(huc);
+}
+
+static void __gsc_init_error(struct intel_huc *huc)
+{
+       huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR;
+       __delayed_huc_load_complete(huc);
+}
+
+static void gsc_init_error(struct intel_huc *huc)
+{
+       hrtimer_cancel(&huc->delayed_load.timer);
+       __gsc_init_error(huc);
+}
+
+static void gsc_init_done(struct intel_huc *huc)
+{
+       hrtimer_cancel(&huc->delayed_load.timer);
+
+       /* MEI-GSC init is done, now we wait for MEI-PXP to bind */
+       huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP;
+       if (!i915_sw_fence_done(&huc->delayed_load.fence))
+               hrtimer_start(&huc->delayed_load.timer,
+                             ms_to_ktime(PXP_INIT_TIMEOUT_MS),
+                             HRTIMER_MODE_REL);
+}
+
+static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer)
+{
+       struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
+
+       if (!intel_huc_is_authenticated(huc)) {
+               if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
+                       drm_notice(&huc_to_gt(huc)->i915->drm,
+                                  "timed out waiting for MEI GSC init to load HuC\n");
+               else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
+                       drm_notice(&huc_to_gt(huc)->i915->drm,
+                                  "timed out waiting for MEI PXP init to load HuC\n");
+               else
+                       MISSING_CASE(huc->delayed_load.status);
+
+               __gsc_init_error(huc);
+       }
+
+       return HRTIMER_NORESTART;
+}
+
+static void huc_delayed_load_start(struct intel_huc *huc)
+{
+       ktime_t delay;
+
+       GEM_BUG_ON(intel_huc_is_authenticated(huc));
+
+       /*
+        * On resume we don't have to wait for MEI-GSC to be re-probed, but we
+        * do need to wait for MEI-PXP to reset & re-bind
+        */
+       switch (huc->delayed_load.status) {
+       case INTEL_HUC_WAITING_ON_GSC:
+               delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS);
+               break;
+       case INTEL_HUC_WAITING_ON_PXP:
+               delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS);
+               break;
+       default:
+               gsc_init_error(huc);
+               return;
+       }
+
+       /*
+        * This fence is always complete unless we're waiting for the
+        * GSC device to come up to load the HuC. We arm the fence here
+        * and complete it when we confirm that the HuC is loaded from
+        * the PXP bind callback.
+        */
+       GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence));
+       i915_sw_fence_fini(&huc->delayed_load.fence);
+       i915_sw_fence_reinit(&huc->delayed_load.fence);
+       i915_sw_fence_await(&huc->delayed_load.fence);
+       i915_sw_fence_commit(&huc->delayed_load.fence);
+
+       hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL);
+}
+
+static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+       struct device *dev = data;
+       struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb);
+       struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0];
+
+       if (!intf->adev || &intf->adev->aux_dev.dev != dev)
+               return 0;
+
+       switch (action) {
+       case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */
+               gsc_init_done(huc);
+               break;
+
+       case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */
+       case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */
+               drm_info(&huc_to_gt(huc)->i915->drm,
+                        "mei driver not bound, disabling HuC load\n");
+               gsc_init_error(huc);
+               break;
+       }
+
+       return 0;
+}
+
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+{
+       int ret;
+
+       if (!intel_huc_is_loaded_by_gsc(huc))
+               return;
+
+       huc->delayed_load.nb.notifier_call = gsc_notifier;
+       ret = bus_register_notifier(bus, &huc->delayed_load.nb);
+       if (ret) {
+               drm_err(&huc_to_gt(huc)->i915->drm,
+                       "failed to register GSC notifier\n");
+               huc->delayed_load.nb.notifier_call = NULL;
+               gsc_init_error(huc);
+       }
+}
+
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+{
+       if (!huc->delayed_load.nb.notifier_call)
+               return;
+
+       delayed_huc_load_complete(huc);
+
+       bus_unregister_notifier(bus, &huc->delayed_load.nb);
+       huc->delayed_load.nb.notifier_call = NULL;
+}
+
 void intel_huc_init_early(struct intel_huc *huc)
 {
        struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc)
                huc->status.mask = HUC_FW_VERIFIED;
                huc->status.value = HUC_FW_VERIFIED;
        }
+
+       /*
+        * Initialize fence to be complete as this is expected to be complete
+        * unless there is a delayed HuC reload in progress.
+        */
+       i915_sw_fence_init(&huc->delayed_load.fence,
+                          sw_fence_dummy_notify);
+       i915_sw_fence_commit(&huc->delayed_load.fence);
+
+       hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
 }
 
 #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
@@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc)
        return 0;
 
 out:
+       intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
        drm_info(&i915->drm, "HuC init failed with %d\n", err);
        return err;
 }
@@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc)
        if (!intel_uc_fw_is_loadable(&huc->fw))
                return;
 
+       delayed_huc_load_complete(huc);
+
+       i915_sw_fence_fini(&huc->delayed_load.fence);
        intel_uc_fw_fini(&huc->fw);
 }
 
+void intel_huc_suspend(struct intel_huc *huc)
+{
+       if (!intel_uc_fw_is_loadable(&huc->fw))
+               return;
+
+       /*
+        * in the unlikely case that we're suspending before the GSC has
+        * completed its loading sequence, just stop waiting. We'll restart
+        * on resume.
+        */
+       delayed_huc_load_complete(huc);
+}
+
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
+{
+       struct intel_gt *gt = huc_to_gt(huc);
+       int ret;
+
+       ret = __intel_wait_for_register(gt->uncore,
+                                       huc->status.reg,
+                                       huc->status.mask,
+                                       huc->status.value,
+                                       2, 50, NULL);
+
+       /* mark the load process as complete even if the wait failed */
+       delayed_huc_load_complete(huc);
+
+       if (ret) {
+               drm_err(&gt->i915->drm, "HuC: Firmware not verified %d\n", ret);
+               intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+               return ret;
+       }
+
+       intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+       drm_info(&gt->i915->drm, "HuC authenticated\n");
+       return 0;
+}
+
 /**
  * intel_huc_auth() - Authenticate HuC uCode
  * @huc: intel_huc structure
@@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc)
        }
 
        /* Check authentication status, it should be done by now */
-       ret = __intel_wait_for_register(gt->uncore,
-                                       huc->status.reg,
-                                       huc->status.mask,
-                                       huc->status.value,
-                                       2, 50, NULL);
-       if (ret) {
-               DRM_ERROR("HuC: Firmware not verified %d\n", ret);
+       ret = intel_huc_wait_for_auth_complete(huc);
+       if (ret)
                goto fail;
-       }
 
-       intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
-       drm_info(&gt->i915->drm, "HuC authenticated\n");
        return 0;
 
 fail:
        i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
-       intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
        return ret;
 }
 
-static bool huc_is_authenticated(struct intel_huc *huc)
+bool intel_huc_is_authenticated(struct intel_huc *huc)
 {
        struct intel_gt *gt = huc_to_gt(huc);
        intel_wakeref_t wakeref;
@@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc)
  * This function reads status register to verify if HuC
  * firmware was successfully loaded.
  *
- * Returns:
- *  * -ENODEV if HuC is not present on this platform,
- *  * -EOPNOTSUPP if HuC firmware is disabled,
- *  * -ENOPKG if HuC firmware was not installed,
- *  * -ENOEXEC if HuC firmware is invalid or mismatched,
- *  * 0 if HuC firmware is not running,
- *  * 1 if HuC firmware is authenticated and running.
+ * The return values match what is expected for the I915_PARAM_HUC_STATUS
+ * getparam.
  */
 int intel_huc_check_status(struct intel_huc *huc)
 {
@@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc)
                return -ENOPKG;
        case INTEL_UC_FIRMWARE_ERROR:
                return -ENOEXEC;
+       case INTEL_UC_FIRMWARE_INIT_FAIL:
+               return -ENOMEM;
+       case INTEL_UC_FIRMWARE_LOAD_FAIL:
+               return -EIO;
        default:
                break;
        }
 
-       return huc_is_authenticated(huc);
+       return intel_huc_is_authenticated(huc);
+}
+
+static bool huc_has_delayed_load(struct intel_huc *huc)
+{
+       return intel_huc_is_loaded_by_gsc(huc) &&
+              (huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR);
 }
 
 void intel_huc_update_auth_status(struct intel_huc *huc)
@@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
        if (!intel_uc_fw_is_loadable(&huc->fw))
                return;
 
-       if (huc_is_authenticated(huc))
+       if (intel_huc_is_authenticated(huc))
                intel_uc_fw_change_status(&huc->fw,
                                          INTEL_UC_FIRMWARE_RUNNING);
+       else if (huc_has_delayed_load(huc))
+               huc_delayed_load_start(huc);
 }
 
 /**
index d7e25b6e879eb7e8249d6fad7898256566d55b31..52db03620c609ab69302e50f2ba64fc3e42709b6 100644 (file)
@@ -7,9 +7,21 @@
 #define _INTEL_HUC_H_
 
 #include "i915_reg_defs.h"
+#include "i915_sw_fence.h"
 #include "intel_uc_fw.h"
 #include "intel_huc_fw.h"
 
+#include <linux/notifier.h>
+#include <linux/hrtimer.h>
+
+struct bus_type;
+
+enum intel_huc_delayed_load_status {
+       INTEL_HUC_WAITING_ON_GSC = 0,
+       INTEL_HUC_WAITING_ON_PXP,
+       INTEL_HUC_DELAYED_LOAD_ERROR,
+};
+
 struct intel_huc {
        /* Generic uC firmware management */
        struct intel_uc_fw fw;
@@ -20,14 +32,27 @@ struct intel_huc {
                u32 mask;
                u32 value;
        } status;
+
+       struct {
+               struct i915_sw_fence fence;
+               struct hrtimer timer;
+               struct notifier_block nb;
+               enum intel_huc_delayed_load_status status;
+       } delayed_load;
 };
 
 void intel_huc_init_early(struct intel_huc *huc);
 int intel_huc_init(struct intel_huc *huc);
 void intel_huc_fini(struct intel_huc *huc);
+void intel_huc_suspend(struct intel_huc *huc);
 int intel_huc_auth(struct intel_huc *huc);
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
 int intel_huc_check_status(struct intel_huc *huc);
 void intel_huc_update_auth_status(struct intel_huc *huc);
+bool intel_huc_is_authenticated(struct intel_huc *huc);
+
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
 
 static inline int intel_huc_sanitize(struct intel_huc *huc)
 {
@@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
        return huc->fw.loaded_via_gsc;
 }
 
+static inline bool intel_huc_wait_required(struct intel_huc *huc)
+{
+       return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
+              !intel_huc_is_authenticated(huc);
+}
+
 void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
 
 #endif
index 9d6ab1e016395f2feeb91f98787b2f1258d0711c..4f246416db17d8fb3a691ec469d56fb4973f44e0 100644 (file)
@@ -3,9 +3,43 @@
  * Copyright © 2014-2019 Intel Corporation
  */
 
+#include "gt/intel_gsc.h"
 #include "gt/intel_gt.h"
+#include "intel_huc.h"
 #include "intel_huc_fw.h"
 #include "i915_drv.h"
+#include "pxp/intel_pxp_huc.h"
+
+int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
+{
+       int ret;
+
+       if (!intel_huc_is_loaded_by_gsc(huc))
+               return -ENODEV;
+
+       if (!intel_uc_fw_is_loadable(&huc->fw))
+               return -ENOEXEC;
+
+       /*
+        * If we abort a suspend, HuC might still be loaded when the mei
+        * component gets re-bound and this function called again. If so, just
+        * mark the HuC as loaded.
+        */
+       if (intel_huc_is_authenticated(huc)) {
+               intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+               return 0;
+       }
+
+       GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw));
+
+       ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp);
+       if (ret)
+               return ret;
+
+       intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
+
+       return intel_huc_wait_for_auth_complete(huc);
+}
 
 /**
  * intel_huc_fw_upload() - load HuC uCode to device via DMA transfer
index 12f264ee3e0b366773c631b842aa569107f5cfbe..db42e238b45f21d34307ed156a9eb960023118cf 100644 (file)
@@ -8,6 +8,7 @@
 
 struct intel_huc;
 
+int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
 int intel_huc_fw_upload(struct intel_huc *huc);
 
 #endif
index b91ad4aede1f79b3b43869537fbd18f3de6d6716..de2843dc1307e5b1a6fa721610e03df0b83b5c32 100644 (file)
@@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
        fw_def(BROXTON,      0, guc_mmp(bxt,  70, 1, 1)) \
        fw_def(SKYLAKE,      0, guc_mmp(skl,  70, 1, 1))
 
-#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \
+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
+       fw_def(DG2,          0, huc_gsc(dg2)) \
        fw_def(ALDERLAKE_P,  0, huc_raw(tgl)) \
        fw_def(ALDERLAKE_P,  0, huc_mmp(tgl,  7, 9, 3)) \
        fw_def(ALDERLAKE_S,  0, huc_raw(tgl)) \
@@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 #define MAKE_HUC_FW_PATH_BLANK(prefix_) \
        __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
 
+#define MAKE_HUC_FW_PATH_GSC(prefix_) \
+       __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc")
+
 #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
        __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
 
@@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
        MODULE_FIRMWARE(uc_);
 
 INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
-INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
 
 /*
  * The next expansion of the table macros (in __uc_fw_auto_select below) provides
@@ -168,6 +172,7 @@ struct __packed uc_fw_blob {
        u8 major;
        u8 minor;
        u8 patch;
+       bool loaded_via_gsc;
 };
 
 #define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@@ -176,16 +181,16 @@ struct __packed uc_fw_blob {
        .patch = patch_, \
        .path = path_,
 
-#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \
+#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
        { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
-         .legacy = false }
+         .legacy = false, .loaded_via_gsc = gsc_ }
 
 #define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
        { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
          .legacy = true }
 
 #define GUC_FW_BLOB(prefix_, major_, minor_) \
-       UC_FW_BLOB_NEW(major_, minor_, 0, \
+       UC_FW_BLOB_NEW(major_, minor_, 0, false, \
                       MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
 
 #define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
@@ -193,12 +198,15 @@ struct __packed uc_fw_blob {
                       MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
 
 #define HUC_FW_BLOB(prefix_) \
-       UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
+       UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_))
 
 #define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
        UC_FW_BLOB_OLD(major_, minor_, patch_, \
                       MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
 
+#define HUC_FW_BLOB_GSC(prefix_) \
+       UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+
 struct __packed uc_fw_platform_requirement {
        enum intel_platform p;
        u8 rev; /* first platform rev using this FW */
@@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
                INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
        };
        static const struct uc_fw_platform_requirement blobs_huc[] = {
-               INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP)
+               INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
        };
        static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
                [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
@@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
                uc_fw->file_wanted.path = blob->path;
                uc_fw->file_wanted.major_ver = blob->major;
                uc_fw->file_wanted.minor_ver = blob->minor;
+               uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
                found = true;
                break;
        }
@@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
 out_unpin:
        i915_gem_object_unpin_pages(uc_fw->obj);
 out:
-       intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
        return err;
 }
 
index eef3bba8a41bceeb7a425860b973ecb71e8f88b2..357c5b65e097b80d01caae75d4812ed0cdb8151f 100644 (file)
@@ -354,9 +354,9 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
        memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
 
        vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
-               pci_resource_len(pdev, GTTMMADR_BAR);
+               pci_resource_len(pdev, GEN4_GTTMMADR_BAR);
        vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
-               pci_resource_len(pdev, GTT_APERTURE_BAR);
+               pci_resource_len(pdev, GEN4_GMADR_BAR);
 
        memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4);
 
index daac2050d77d0440bca30c83ca2d6399f65fc89c..1cb388484bf011c7ce307c1cab352647bd01f3a7 100644 (file)
@@ -734,7 +734,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
        _MMIO(0x770c),
        _MMIO(0x83a8),
        _MMIO(0xb110),
-       GEN8_L3SQCREG4,//_MMIO(0xb118)
+       _MMIO(0xb118),
        _MMIO(0xe100),
        _MMIO(0xe18c),
        _MMIO(0xe48c),
@@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
        MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
        MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
        MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
        MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
        /* display */
index 1c6e941c96666c76b3692e8d378113f273b3f0e1..200c1162daa34b15dbc74fc35dc09076e5a9e439 100644 (file)
@@ -106,15 +106,15 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
        {RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
        {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
        {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
-       {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
-       {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
+       {RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
+       {RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
        {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
        {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
-       {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
-       {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
-       {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
-       {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
-       {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
+       {RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
+       {RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
+       {RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
+       {RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
+       {RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
        {RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
        {RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
        {RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
index 298ed36f078a8fee225f74d268e532af585ab3ae..c3d43f9b1e45dbaefcb341a27436889ea44464bf 100644 (file)
@@ -81,6 +81,7 @@
 #include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_getparam.h"
+#include "i915_hwmon.h"
 #include "i915_ioc32.h"
 #include "i915_ioctl.h"
 #include "i915_irq.h"
@@ -764,6 +765,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
        for_each_gt(gt, dev_priv, i)
                intel_gt_driver_register(gt);
 
+       i915_hwmon_register(dev_priv);
+
        intel_display_driver_register(dev_priv);
 
        intel_power_domains_enable(dev_priv);
@@ -796,6 +799,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
        for_each_gt(gt, dev_priv, i)
                intel_gt_driver_unregister(gt);
 
+       i915_hwmon_unregister(dev_priv);
+
        i915_perf_unregister(dev_priv);
        i915_pmu_unregister(dev_priv);
 
@@ -1656,7 +1661,8 @@ static int intel_runtime_suspend(struct device *kdev)
 
                intel_runtime_pm_enable_interrupts(dev_priv);
 
-               intel_gt_runtime_resume(to_gt(dev_priv));
+               for_each_gt(gt, dev_priv, i)
+                       intel_gt_runtime_resume(gt);
 
                enable_rpm_wakeref_asserts(rpm);
 
index 349ff7d65debd7a120b73f4f0f29109a636d0fdf..05b3300cc4edfee3b4b790ea5c120a794917fb97 100644 (file)
@@ -40,7 +40,6 @@
 #include "display/intel_display_core.h"
 
 #include "gem/i915_gem_context_types.h"
-#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_shrinker.h"
 #include "gem/i915_gem_stolen.h"
 
@@ -350,6 +349,8 @@ struct drm_i915_private {
 
        struct i915_perf perf;
 
+       struct i915_hwmon *hwmon;
+
        /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
        struct intel_gt gt0;
 
@@ -898,19 +899,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
+#define HAS_OA_BPC_REPORTING(dev_priv) \
+       (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
+       (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
+
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
  * device local memory access.
  */
 #define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
 
-/*
- * Set this flag when platform doesn't allow both 64k pages and 4k pages in
- * the same PT. this flag means we need to support compact PT layout for the
- * ppGTT when using the 64K GTT pages.
- */
-#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt)
-
 #define HAS_IPC(dev_priv)               (INTEL_INFO(dev_priv)->display.has_ipc)
 
 #define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i))
@@ -976,6 +975,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_ONE_EU_PER_FUSE_BIT(i915)  (INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
 
+#define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
+                                      GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+
 /* intel_device_info.c */
 static inline struct intel_device_info *
 mkwrite_device_info(struct drm_i915_private *dev_priv)
@@ -983,16 +985,4 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
        return (struct intel_device_info *)INTEL_INFO(dev_priv);
 }
 
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915,
-                      struct drm_i915_gem_object *obj, bool always_coherent)
-{
-       if (i915_gem_object_is_lmem(obj))
-               return I915_MAP_WC;
-       if (HAS_LLC(i915) || always_coherent)
-               return I915_MAP_WB;
-       else
-               return I915_MAP_WC;
-}
-
 #endif
index 2bdddb61ebd7ae6215eaa26706a2df3258d42171..299f94a9fb87bdb0b7505f2c1f338628ff5a3f72 100644 (file)
@@ -843,7 +843,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
                __i915_gem_object_release_mmap_gtt(obj);
 
        list_for_each_entry_safe(obj, on,
-                                &to_gt(i915)->lmem_userfault_list, userfault_link)
+                                &i915->runtime_pm.lmem_userfault_list, userfault_link)
                i915_gem_object_runtime_pm_release_mmap_offset(obj);
 
        /*
@@ -1128,6 +1128,8 @@ void i915_gem_drain_workqueue(struct drm_i915_private *i915)
 
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
+       struct intel_gt *gt;
+       unsigned int i;
        int ret;
 
        /* We need to fallback to 4K pages if host doesn't support huge gtt. */
@@ -1158,9 +1160,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
         */
        intel_init_clock_gating(dev_priv);
 
-       ret = intel_gt_init(to_gt(dev_priv));
-       if (ret)
-               goto err_unlock;
+       for_each_gt(gt, dev_priv, i) {
+               ret = intel_gt_init(gt);
+               if (ret)
+                       goto err_unlock;
+       }
 
        return 0;
 
@@ -1173,8 +1177,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_unlock:
        i915_gem_drain_workqueue(dev_priv);
 
-       if (ret != -EIO)
-               intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
+       if (ret != -EIO) {
+               for_each_gt(gt, dev_priv, i) {
+                       intel_gt_driver_remove(gt);
+                       intel_gt_driver_release(gt);
+                       intel_uc_cleanup_firmwares(&gt->uc);
+               }
+       }
 
        if (ret == -EIO) {
                /*
@@ -1182,10 +1191,12 @@ err_unlock:
                 * as wedged. But we only want to do this when the GPU is angry,
                 * for all other failure, such as an allocation failure, bail.
                 */
-               if (!intel_gt_is_wedged(to_gt(dev_priv))) {
-                       i915_probe_error(dev_priv,
-                                        "Failed to initialize GPU, declaring it wedged!\n");
-                       intel_gt_set_wedged(to_gt(dev_priv));
+               for_each_gt(gt, dev_priv, i) {
+                       if (!intel_gt_is_wedged(gt)) {
+                               i915_probe_error(dev_priv,
+                                                "Failed to initialize GPU, declaring it wedged!\n");
+                               intel_gt_set_wedged(gt);
+                       }
                }
 
                /* Minimal basic recovery for KMS */
@@ -1213,23 +1224,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
-       intel_wakeref_auto_fini(&to_gt(dev_priv)->userfault_wakeref);
+       struct intel_gt *gt;
+       unsigned int i;
 
        i915_gem_suspend_late(dev_priv);
-       intel_gt_driver_remove(to_gt(dev_priv));
+       for_each_gt(gt, dev_priv, i)
+               intel_gt_driver_remove(gt);
        dev_priv->uabi_engines = RB_ROOT;
 
        /* Flush any outstanding unpin_work. */
        i915_gem_drain_workqueue(dev_priv);
-
-       i915_gem_drain_freed_objects(dev_priv);
 }
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-       intel_gt_driver_release(to_gt(dev_priv));
+       struct intel_gt *gt;
+       unsigned int i;
 
-       intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
+       for_each_gt(gt, dev_priv, i) {
+               intel_gt_driver_release(gt);
+               intel_uc_cleanup_firmwares(&gt->uc);
+       }
 
        /* Flush any outstanding work, including i915_gem_context.release_work. */
        i915_gem_drain_workqueue(dev_priv);
@@ -1259,7 +1274,7 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
 
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 {
-       i915_gem_drain_freed_objects(dev_priv);
+       i915_gem_drain_workqueue(dev_priv);
        GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
        GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
        drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);
index 342c8ca6414ec6476a08ff086a44ca14eab754de..3047e80e1163d6bf658c6f32b344e6c63a7626cf 100644 (file)
@@ -175,6 +175,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
        case I915_PARAM_PERF_REVISION:
                value = i915_perf_ioctl_version();
                break;
+       case I915_PARAM_OA_TIMESTAMP_FREQUENCY:
+               value = i915_perf_oa_timestamp_frequency(i915);
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
index 9ea2fe34e7d307f6a40c979ea5a9860a02f9dbdc..f2d53edcd2ee072561548046d8224209718e74f0 100644 (file)
@@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
        if (GRAPHICS_VER(i915) >= 6) {
                ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
 
-               if (GRAPHICS_VER(i915) >= 12)
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+                       ee->fault_reg = intel_gt_mcr_read_any(engine->gt,
+                                                             XEHP_RING_FAULT_REG);
+               else if (GRAPHICS_VER(i915) >= 12)
                        ee->fault_reg = intel_uncore_read(engine->uncore,
                                                          GEN12_RING_FAULT_REG);
                else if (GRAPHICS_VER(i915) >= 8)
@@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
        if (GRAPHICS_VER(i915) == 7)
                gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
-       if (GRAPHICS_VER(i915) >= 12) {
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+                                                       XEHP_FAULT_TLB_DATA0);
+               gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+                                                       XEHP_FAULT_TLB_DATA1);
+       } else if (GRAPHICS_VER(i915) >= 12) {
                gt->fault_data0 = intel_uncore_read(uncore,
                                                    GEN12_FAULT_TLB_DATA0);
                gt->fault_data1 = intel_uncore_read(uncore,
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
new file mode 100644 (file)
index 0000000..c588a17
--- /dev/null
@@ -0,0 +1,732 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/types.h>
+
+#include "i915_drv.h"
+#include "i915_hwmon.h"
+#include "i915_reg.h"
+#include "intel_mchbar_regs.h"
+#include "intel_pcode.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ * - voltage  - millivolts
+ * - power  - microwatts
+ * - curr   - milliamperes
+ * - energy - microjoules
+ * - time   - milliseconds
+ */
+#define SF_VOLTAGE     1000
+#define SF_POWER       1000000
+#define SF_CURR                1000
+#define SF_ENERGY      1000000
+#define SF_TIME                1000
+
+struct hwm_reg {
+       i915_reg_t gt_perf_status;
+       i915_reg_t pkg_power_sku_unit;
+       i915_reg_t pkg_power_sku;
+       i915_reg_t pkg_rapl_limit;
+       i915_reg_t energy_status_all;
+       i915_reg_t energy_status_tile;
+};
+
+struct hwm_energy_info {
+       u32 reg_val_prev;
+       long accum_energy;                      /* Accumulated energy for energy1_input */
+};
+
+struct hwm_drvdata {
+       struct i915_hwmon *hwmon;
+       struct intel_uncore *uncore;
+       struct device *hwmon_dev;
+       struct hwm_energy_info ei;              /*  Energy info for energy1_input */
+       char name[12];
+       int gt_n;
+};
+
+struct i915_hwmon {
+       struct hwm_drvdata ddat;
+       struct hwm_drvdata ddat_gt[I915_MAX_GT];
+       struct mutex hwmon_lock;                /* counter overflow logic and rmw */
+       struct hwm_reg rg;
+       int scl_shift_power;
+       int scl_shift_energy;
+       int scl_shift_time;
+};
+
+static void
+hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
+                                   i915_reg_t reg, u32 clear, u32 set)
+{
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       struct intel_uncore *uncore = ddat->uncore;
+       intel_wakeref_t wakeref;
+
+       mutex_lock(&hwmon->hwmon_lock);
+
+       with_intel_runtime_pm(uncore->rpm, wakeref)
+               intel_uncore_rmw(uncore, reg, clear, set);
+
+       mutex_unlock(&hwmon->hwmon_lock);
+}
+
+/*
+ * This function's return type of u64 allows for the case where the scaling
+ * of the field taken from the 32-bit register value might cause a result to
+ * exceed 32 bits.
+ */
+static u64
+hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+                        u32 field_msk, int nshift, u32 scale_factor)
+{
+       struct intel_uncore *uncore = ddat->uncore;
+       intel_wakeref_t wakeref;
+       u32 reg_value;
+
+       with_intel_runtime_pm(uncore->rpm, wakeref)
+               reg_value = intel_uncore_read(uncore, rgadr);
+
+       reg_value = REG_FIELD_GET(field_msk, reg_value);
+
+       return mul_u64_u32_shr(reg_value, scale_factor, nshift);
+}
+
+static void
+hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+                         int nshift, unsigned int scale_factor, long lval)
+{
+       u32 nval;
+
+       /* Computation in 64-bits to avoid overflow. Round to nearest. */
+       nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
+
+       hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
+                                           PKG_PWR_LIM_1,
+                                           REG_FIELD_PREP(PKG_PWR_LIM_1, nval));
+}
+
+/*
+ * hwm_energy - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ */
+static void
+hwm_energy(struct hwm_drvdata *ddat, long *energy)
+{
+       struct intel_uncore *uncore = ddat->uncore;
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       struct hwm_energy_info *ei = &ddat->ei;
+       intel_wakeref_t wakeref;
+       i915_reg_t rgaddr;
+       u32 reg_val;
+
+       if (ddat->gt_n >= 0)
+               rgaddr = hwmon->rg.energy_status_tile;
+       else
+               rgaddr = hwmon->rg.energy_status_all;
+
+       mutex_lock(&hwmon->hwmon_lock);
+
+       with_intel_runtime_pm(uncore->rpm, wakeref)
+               reg_val = intel_uncore_read(uncore, rgaddr);
+
+       if (reg_val >= ei->reg_val_prev)
+               ei->accum_energy += reg_val - ei->reg_val_prev;
+       else
+               ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+       ei->reg_val_prev = reg_val;
+
+       *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+                                 hwmon->scl_shift_energy);
+       mutex_unlock(&hwmon->hwmon_lock);
+}
+
+static ssize_t
+hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       intel_wakeref_t wakeref;
+       u32 r, x, y, x_w = 2; /* 2 bits */
+       u64 tau4, out;
+
+       with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+               r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+
+       x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+       y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+       /*
+        * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+        *     = (4 | x) << (y - 2)
+        * where (y - 2) ensures a 1.x fixed point representation of 1.x
+        * However because y can be < 2, we compute
+        *     tau4 = (4 | x) << y
+        * but add 2 when doing the final right shift to account for units
+        */
+       tau4 = ((1 << x_w) | x) << y;
+       /* val in hwmon interface units (millisec) */
+       out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+       return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+hwm_power1_max_interval_store(struct device *dev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       u32 x, y, rxy, x_w = 2; /* 2 bits */
+       u64 tau4, r, max_win;
+       unsigned long val;
+       int ret;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       /*
+        * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12
+        * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds
+        */
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+       /*
+        * val must be < max in hwmon interface units. The steps below are
+        * explained in i915_power1_max_interval_show()
+        */
+       r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+       x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+       y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+       tau4 = ((1 << x_w) | x) << y;
+       max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+       if (val > max_win)
+               return -EINVAL;
+
+       /* val in hw units */
+       val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+       /* Convert to 1.x * power(2,y) */
+       if (!val)
+               return -EINVAL;
+       y = ilog2(val);
+       /* x = (val - (1 << y)) >> (y - 2); */
+       x = (val - (1ul << y)) << x_w >> y;
+
+       rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+
+       hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
+                                           PKG_PWR_LIM_1_TIME, rxy);
+       return count;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+                         hwm_power1_max_interval_show,
+                         hwm_power1_max_interval_store, 0);
+
+static struct attribute *hwm_attributes[] = {
+       &sensor_dev_attr_power1_max_interval.dev_attr.attr,
+       NULL
+};
+
+static umode_t hwm_attributes_visible(struct kobject *kobj,
+                                     struct attribute *attr, int index)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+       struct i915_hwmon *hwmon = ddat->hwmon;
+
+       if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+               return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
+
+       return 0;
+}
+
+static const struct attribute_group hwm_attrgroup = {
+       .attrs = hwm_attributes,
+       .is_visible = hwm_attributes_visible,
+};
+
+static const struct attribute_group *hwm_groups[] = {
+       &hwm_attrgroup,
+       NULL
+};
+
+static const struct hwmon_channel_info *hwm_info[] = {
+       HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
+       HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
+       HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+       HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
+       NULL
+};
+
+static const struct hwmon_channel_info *hwm_gt_info[] = {
+       HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+       NULL
+};
+
+/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
+static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval)
+{
+       return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP,
+                               POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval);
+}
+
+static int hwm_pcode_write_i1(struct drm_i915_private *i915, u32 uval)
+{
+       return  snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP,
+                                 POWER_SETUP_SUBCOMMAND_WRITE_I1, 0, uval);
+}
+
+static umode_t
+hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+       struct drm_i915_private *i915 = ddat->uncore->i915;
+
+       switch (attr) {
+       case hwmon_in_input:
+               return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0;
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       intel_wakeref_t wakeref;
+       u32 reg_value;
+
+       switch (attr) {
+       case hwmon_in_input:
+               with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+                       reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status);
+               /* HW register value in units of 2.5 millivolt */
+               *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static umode_t
+hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan)
+{
+       struct drm_i915_private *i915 = ddat->uncore->i915;
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_power_max:
+               return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 0664 : 0;
+       case hwmon_power_rated_max:
+               return i915_mmio_reg_valid(hwmon->rg.pkg_power_sku) ? 0444 : 0;
+       case hwmon_power_crit:
+               return (hwm_pcode_read_i1(i915, &uval) ||
+                       !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
+{
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       int ret;
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_power_max:
+               *val = hwm_field_read_and_scale(ddat,
+                                               hwmon->rg.pkg_rapl_limit,
+                                               PKG_PWR_LIM_1,
+                                               hwmon->scl_shift_power,
+                                               SF_POWER);
+               return 0;
+       case hwmon_power_rated_max:
+               *val = hwm_field_read_and_scale(ddat,
+                                               hwmon->rg.pkg_power_sku,
+                                               PKG_PKG_TDP,
+                                               hwmon->scl_shift_power,
+                                               SF_POWER);
+               return 0;
+       case hwmon_power_crit:
+               ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
+               if (ret)
+                       return ret;
+               if (!(uval & POWER_SETUP_I1_WATTS))
+                       return -ENODEV;
+               *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+                                      SF_POWER, POWER_SETUP_I1_SHIFT);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int
+hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
+{
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_power_max:
+               hwm_field_scale_and_write(ddat,
+                                         hwmon->rg.pkg_rapl_limit,
+                                         hwmon->scl_shift_power,
+                                         SF_POWER, val);
+               return 0;
+       case hwmon_power_crit:
+               uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
+               return hwm_pcode_write_i1(ddat->uncore->i915, uval);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static umode_t
+hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+       struct i915_hwmon *hwmon = ddat->hwmon;
+       i915_reg_t rgaddr;
+
+       switch (attr) {
+       case hwmon_energy_input:
+               if (ddat->gt_n >= 0)
+                       rgaddr = hwmon->rg.energy_status_tile;
+               else
+                       rgaddr = hwmon->rg.energy_status_all;
+               return i915_mmio_reg_valid(rgaddr) ? 0444 : 0;
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_energy_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+       switch (attr) {
+       case hwmon_energy_input:
+               hwm_energy(ddat, val);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static umode_t
+hwm_curr_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+       struct drm_i915_private *i915 = ddat->uncore->i915;
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_curr_crit:
+               return (hwm_pcode_read_i1(i915, &uval) ||
+                       (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_curr_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+       int ret;
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_curr_crit:
+               ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
+               if (ret)
+                       return ret;
+               if (uval & POWER_SETUP_I1_WATTS)
+                       return -ENODEV;
+               *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+                                      SF_CURR, POWER_SETUP_I1_SHIFT);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int
+hwm_curr_write(struct hwm_drvdata *ddat, u32 attr, long val)
+{
+       u32 uval;
+
+       switch (attr) {
+       case hwmon_curr_crit:
+               uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
+               return hwm_pcode_write_i1(ddat->uncore->i915, uval);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static umode_t
+hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+              u32 attr, int channel)
+{
+       struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
+
+       switch (type) {
+       case hwmon_in:
+               return hwm_in_is_visible(ddat, attr);
+       case hwmon_power:
+               return hwm_power_is_visible(ddat, attr, channel);
+       case hwmon_energy:
+               return hwm_energy_is_visible(ddat, attr);
+       case hwmon_curr:
+               return hwm_curr_is_visible(ddat, attr);
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+        int channel, long *val)
+{
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+       switch (type) {
+       case hwmon_in:
+               return hwm_in_read(ddat, attr, val);
+       case hwmon_power:
+               return hwm_power_read(ddat, attr, channel, val);
+       case hwmon_energy:
+               return hwm_energy_read(ddat, attr, val);
+       case hwmon_curr:
+               return hwm_curr_read(ddat, attr, val);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int
+hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+         int channel, long val)
+{
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+       switch (type) {
+       case hwmon_power:
+               return hwm_power_write(ddat, attr, channel, val);
+       case hwmon_curr:
+               return hwm_curr_write(ddat, attr, val);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct hwmon_ops hwm_ops = {
+       .is_visible = hwm_is_visible,
+       .read = hwm_read,
+       .write = hwm_write,
+};
+
+static const struct hwmon_chip_info hwm_chip_info = {
+       .ops = &hwm_ops,
+       .info = hwm_info,
+};
+
+static umode_t
+hwm_gt_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+                 u32 attr, int channel)
+{
+       struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
+
+       switch (type) {
+       case hwmon_energy:
+               return hwm_energy_is_visible(ddat, attr);
+       default:
+               return 0;
+       }
+}
+
+static int
+hwm_gt_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+           int channel, long *val)
+{
+       struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+       switch (type) {
+       case hwmon_energy:
+               return hwm_energy_read(ddat, attr, val);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct hwmon_ops hwm_gt_ops = {
+       .is_visible = hwm_gt_is_visible,
+       .read = hwm_gt_read,
+};
+
+static const struct hwmon_chip_info hwm_gt_chip_info = {
+       .ops = &hwm_gt_ops,
+       .info = hwm_gt_info,
+};
+
+static void
+hwm_get_preregistration_info(struct drm_i915_private *i915)
+{
+       struct i915_hwmon *hwmon = i915->hwmon;
+       struct intel_uncore *uncore = &i915->uncore;
+       struct hwm_drvdata *ddat = &hwmon->ddat;
+       intel_wakeref_t wakeref;
+       u32 val_sku_unit = 0;
+       struct intel_gt *gt;
+       long energy;
+       int i;
+
+       /* Available for all Gen12+/dGfx */
+       hwmon->rg.gt_perf_status = GEN12_RPSTAT1;
+
+       if (IS_DG1(i915) || IS_DG2(i915)) {
+               hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
+               hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
+               hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
+               hwmon->rg.energy_status_all = PCU_PACKAGE_ENERGY_STATUS;
+               hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
+       } else if (IS_XEHPSDV(i915)) {
+               hwmon->rg.pkg_power_sku_unit = GT0_PACKAGE_POWER_SKU_UNIT;
+               hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
+               hwmon->rg.pkg_rapl_limit = GT0_PACKAGE_RAPL_LIMIT;
+               hwmon->rg.energy_status_all = GT0_PLATFORM_ENERGY_STATUS;
+               hwmon->rg.energy_status_tile = GT0_PACKAGE_ENERGY_STATUS;
+       } else {
+               hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
+               hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
+               hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
+               hwmon->rg.energy_status_all = INVALID_MMIO_REG;
+               hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
+       }
+
+       with_intel_runtime_pm(uncore->rpm, wakeref) {
+               /*
+                * The contents of register hwmon->rg.pkg_power_sku_unit do not change,
+                * so read it once and store the shift values.
+                */
+               if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
+                       val_sku_unit = intel_uncore_read(uncore,
+                                                        hwmon->rg.pkg_power_sku_unit);
+       }
+
+       hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+       hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+       hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+
+       /*
+        * Initialize 'struct hwm_energy_info', i.e. set fields to the
+        * first value of the energy register read
+        */
+       if (i915_mmio_reg_valid(hwmon->rg.energy_status_all))
+               hwm_energy(ddat, &energy);
+       if (i915_mmio_reg_valid(hwmon->rg.energy_status_tile)) {
+               for_each_gt(gt, i915, i)
+                       hwm_energy(&hwmon->ddat_gt[i], &energy);
+       }
+}
+
+void i915_hwmon_register(struct drm_i915_private *i915)
+{
+       struct device *dev = i915->drm.dev;
+       struct i915_hwmon *hwmon;
+       struct device *hwmon_dev;
+       struct hwm_drvdata *ddat;
+       struct hwm_drvdata *ddat_gt;
+       struct intel_gt *gt;
+       int i;
+
+       /* hwmon is available only for dGfx */
+       if (!IS_DGFX(i915))
+               return;
+
+       hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+       if (!hwmon)
+               return;
+
+       i915->hwmon = hwmon;
+       mutex_init(&hwmon->hwmon_lock);
+       ddat = &hwmon->ddat;
+
+       ddat->hwmon = hwmon;
+       ddat->uncore = &i915->uncore;
+       snprintf(ddat->name, sizeof(ddat->name), "i915");
+       ddat->gt_n = -1;
+
+       for_each_gt(gt, i915, i) {
+               ddat_gt = hwmon->ddat_gt + i;
+
+               ddat_gt->hwmon = hwmon;
+               ddat_gt->uncore = gt->uncore;
+               snprintf(ddat_gt->name, sizeof(ddat_gt->name), "i915_gt%u", i);
+               ddat_gt->gt_n = i;
+       }
+
+       hwm_get_preregistration_info(i915);
+
+       /*  hwmon_dev points to device hwmon<i> */
+       hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
+                                                        ddat,
+                                                        &hwm_chip_info,
+                                                        hwm_groups);
+       if (IS_ERR(hwmon_dev)) {
+               i915->hwmon = NULL;
+               return;
+       }
+
+       ddat->hwmon_dev = hwmon_dev;
+
+       for_each_gt(gt, i915, i) {
+               ddat_gt = hwmon->ddat_gt + i;
+               /*
+                * Create per-gt directories only if a per-gt attribute is
+                * visible. Currently this is only energy
+                */
+               if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0))
+                       continue;
+
+               hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name,
+                                                                ddat_gt,
+                                                                &hwm_gt_chip_info,
+                                                                NULL);
+               if (!IS_ERR(hwmon_dev))
+                       ddat_gt->hwmon_dev = hwmon_dev;
+       }
+}
+
+void i915_hwmon_unregister(struct drm_i915_private *i915)
+{
+       fetch_and_zero(&i915->hwmon);
+}
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
new file mode 100644 (file)
index 0000000..7ca9cf2
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_HWMON_H__
+#define __I915_HWMON_H__
+
+struct drm_i915_private;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void i915_hwmon_register(struct drm_i915_private *i915);
+void i915_hwmon_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_hwmon_register(struct drm_i915_private *i915) { };
+static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { };
+#endif
+
+#endif /* __I915_HWMON_H__ */
index 9486127a44f7945957505d992addae22fd4b9dfa..211913be40cec6b4f83d58f2406eed15ef88b7a4 100644 (file)
@@ -1023,6 +1023,8 @@ static const struct intel_device_info adl_p_info = {
        .has_logical_ring_contexts = 1, \
        .has_logical_ring_elsq = 1, \
        .has_mslice_steering = 1, \
+       .has_oa_bpc_reporting = 1, \
+       .has_oa_slice_contrib_limits = 1, \
        .has_rc6 = 1, \
        .has_reset_engine = 1, \
        .has_rps = 1, \
@@ -1042,7 +1044,6 @@ static const struct intel_device_info xehpsdv_info = {
        PLATFORM(INTEL_XEHPSDV),
        NO_DISPLAY,
        .has_64k_pages = 1,
-       .needs_compact_pt = 1,
        .has_media_ratio_mode = 1,
        .__runtime.platform_engine_mask =
                BIT(RCS0) | BIT(BCS0) |
@@ -1064,7 +1065,6 @@ static const struct intel_device_info xehpsdv_info = {
        .has_64k_pages = 1, \
        .has_guc_deprivilege = 1, \
        .has_heci_pxp = 1, \
-       .needs_compact_pt = 1, \
        .has_media_ratio_mode = 1, \
        .display.has_cdclk_squash = 1, \
        .__runtime.platform_engine_mask = \
@@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = {
        .extra_gt_list = xelpmp_extra_gt,
        .has_flat_ccs = 0,
        .has_gmd_id = 1,
+       .has_mslice_steering = 0,
        .has_snoop = 1,
        .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
        .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0),
@@ -1298,9 +1299,7 @@ bool i915_pci_resource_valid(struct pci_dev *pdev, int bar)
 
 static bool intel_mmio_bar_valid(struct pci_dev *pdev, struct intel_device_info *intel_info)
 {
-       int gttmmaddr_bar = intel_info->__runtime.graphics.ip.ver == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
-
-       return i915_pci_resource_valid(pdev, gttmmaddr_bar);
+       return i915_pci_resource_valid(pdev, intel_mmio_bar(intel_info->__runtime.graphics.ip.ver));
 }
 
 static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
index 0defbb43ceea8365f772d41681f9490df1b869fb..0dd597a7a11f509dd2b79f4e513b61251cfd63d5 100644 (file)
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_clock_utils.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc.h"
 #include "gt/intel_lrc_reg.h"
 #include "gt/intel_ring.h"
+#include "gt/uc/intel_guc_slpc.h"
 
 #include "i915_drv.h"
 #include "i915_file_private.h"
@@ -286,6 +288,7 @@ static u32 i915_perf_stream_paranoid = true;
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
 
+#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 
 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
  *
@@ -320,6 +323,8 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
        [I915_OA_FORMAT_A12]                = { 0, 64 },
        [I915_OA_FORMAT_A12_B8_C8]          = { 2, 128 },
        [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+       [I915_OAR_FORMAT_A32u40_A4u32_B8_C8]    = { 5, 256 },
+       [I915_OA_FORMAT_A24u40_A14u32_B8_C8]    = { 5, 256 },
 };
 
 #define SAMPLE_OA_REPORT      (1<<0)
@@ -462,7 +467,7 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 {
        u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
-       int report_size = stream->oa_buffer.format_size;
+       int report_size = stream->oa_buffer.format->size;
        unsigned long flags;
        bool pollin;
        u32 hw_tail;
@@ -599,7 +604,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
                            size_t *offset,
                            const u8 *report)
 {
-       int report_size = stream->oa_buffer.format_size;
+       int report_size = stream->oa_buffer.format->size;
        struct drm_i915_perf_record_header header;
 
        header.type = DRM_I915_PERF_RECORD_SAMPLE;
@@ -649,14 +654,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                                  size_t *offset)
 {
        struct intel_uncore *uncore = stream->uncore;
-       int report_size = stream->oa_buffer.format_size;
+       int report_size = stream->oa_buffer.format->size;
        u8 *oa_buf_base = stream->oa_buffer.vaddr;
        u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
        u32 mask = (OA_BUFFER_SIZE - 1);
        size_t start_offset = *offset;
        unsigned long flags;
        u32 head, tail;
-       u32 taken;
        int ret = 0;
 
        if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -692,7 +696,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 
 
        for (/* none */;
-            (taken = OA_TAKEN(tail, head));
+            OA_TAKEN(tail, head);
             head = (head + report_size) & mask) {
                u8 *report = oa_buf_base + head;
                u32 *report32 = (void *)report;
@@ -774,7 +778,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                 * switches since it's not-uncommon for periodic samples to
                 * identify a switch before any 'context switch' report.
                 */
-               if (!stream->perf->exclusive_stream->ctx ||
+               if (!stream->ctx ||
                    stream->specific_ctx_id == ctx_id ||
                    stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
                    reason & OAREPORT_REASON_CTX_SWITCH) {
@@ -783,7 +787,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                         * While filtering for a single context we avoid
                         * leaking the IDs of other contexts.
                         */
-                       if (stream->perf->exclusive_stream->ctx &&
+                       if (stream->ctx &&
                            stream->specific_ctx_id != ctx_id) {
                                report32[2] = INVALID_CTX_ID;
                        }
@@ -943,14 +947,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
                                  size_t *offset)
 {
        struct intel_uncore *uncore = stream->uncore;
-       int report_size = stream->oa_buffer.format_size;
+       int report_size = stream->oa_buffer.format->size;
        u8 *oa_buf_base = stream->oa_buffer.vaddr;
        u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
        u32 mask = (OA_BUFFER_SIZE - 1);
        size_t start_offset = *offset;
        unsigned long flags;
        u32 head, tail;
-       u32 taken;
        int ret = 0;
 
        if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -984,7 +987,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 
 
        for (/* none */;
-            (taken = OA_TAKEN(tail, head));
+            OA_TAKEN(tail, head);
             head = (head + report_size) & mask) {
                u8 *report = oa_buf_base + head;
                u32 *report32 = (void *)report;
@@ -1233,6 +1236,196 @@ retry:
        return stream->pinned_ctx;
 }
 
+static int
+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
+{
+       u32 *cs, cmd;
+
+       cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+       if (GRAPHICS_VER(rq->engine->i915) >= 8)
+               cmd++;
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = cmd;
+       *cs++ = i915_mmio_reg_offset(reg);
+       *cs++ = ggtt_offset;
+       *cs++ = 0;
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static int
+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
+{
+       struct i915_request *rq;
+       int err;
+
+       rq = i915_request_create(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       i915_request_get(rq);
+
+       err = __store_reg_to_mem(rq, reg, ggtt_offset);
+
+       i915_request_add(rq);
+       if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
+               err = -ETIME;
+
+       i915_request_put(rq);
+
+       return err;
+}
+
+static int
+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
+{
+       struct i915_vma *scratch;
+       u32 *val;
+       int err;
+
+       scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
+       if (IS_ERR(scratch))
+               return PTR_ERR(scratch);
+
+       err = i915_vma_sync(scratch);
+       if (err)
+               goto err_scratch;
+
+       err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
+                        i915_ggtt_offset(scratch));
+       if (err)
+               goto err_scratch;
+
+       val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
+       if (IS_ERR(val)) {
+               err = PTR_ERR(val);
+               goto err_scratch;
+       }
+
+       *ctx_id = *val;
+       i915_gem_object_unpin_map(scratch->obj);
+
+err_scratch:
+       i915_vma_unpin_and_release(&scratch, 0);
+       return err;
+}
+
+/*
+ * For execlist mode of submission, pick an unused context id
+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
+ *
+ * For GuC mode of submission read context id from the upper dword of the
+ * EXECLIST_STATUS register. Note that we read this value only once and expect
+ * that the value stays fixed for the entire OA use case. There are cases where
+ * GuC KMD implementation may deregister a context to reuse it's context id, but
+ * we prevent that from happening to the OA context by pinning it.
+ */
+static int gen12_get_render_context_id(struct i915_perf_stream *stream)
+{
+       u32 ctx_id, mask;
+       int ret;
+
+       if (intel_engine_uses_guc(stream->engine)) {
+               ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
+               if (ret)
+                       return ret;
+
+               mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
+                       (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
+       } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
+               ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
+                       (XEHP_SW_CTX_ID_SHIFT - 32);
+
+               mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
+                       (XEHP_SW_CTX_ID_SHIFT - 32);
+       } else {
+               ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
+                        (GEN11_SW_CTX_ID_SHIFT - 32);
+
+               mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
+                       (GEN11_SW_CTX_ID_SHIFT - 32);
+       }
+       stream->specific_ctx_id = ctx_id & mask;
+       stream->specific_ctx_id_mask = mask;
+
+       return 0;
+}
+
+static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
+{
+       u32 idx = *offset;
+       u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
+       bool found = false;
+
+       idx++;
+       for (; idx < len; idx += 2) {
+               if (state[idx] == reg) {
+                       found = true;
+                       break;
+               }
+       }
+
+       *offset = idx;
+       return found;
+}
+
+static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
+{
+       u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
+       u32 *state = ce->lrc_reg_state;
+
+       for (offset = 0; offset < len; ) {
+               if (IS_MI_LRI_CMD(state[offset])) {
+                       /*
+                        * We expect reg-value pairs in MI_LRI command, so
+                        * MI_LRI_LEN() should be even, if not, issue a warning.
+                        */
+                       drm_WARN_ON(&ce->engine->i915->drm,
+                                   MI_LRI_LEN(state[offset]) & 0x1);
+
+                       if (oa_find_reg_in_lri(state, reg, &offset, len))
+                               break;
+               } else {
+                       offset++;
+               }
+       }
+
+       return offset < len ? offset : U32_MAX;
+}
+
+static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
+{
+       i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
+       struct i915_perf *perf = &ce->engine->i915->perf;
+       u32 offset = perf->ctx_oactxctrl_offset;
+
+       /* Do this only once. Failure is stored as offset of U32_MAX */
+       if (offset)
+               goto exit;
+
+       offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
+       perf->ctx_oactxctrl_offset = offset;
+
+       drm_dbg(&ce->engine->i915->drm,
+               "%s oa ctx control at 0x%08x dword offset\n",
+               ce->engine->name, offset);
+
+exit:
+       return offset && offset != U32_MAX ? 0 : -ENODEV;
+}
+
+static bool engine_supports_mi_query(struct intel_engine_cs *engine)
+{
+       return engine->class == RENDER_CLASS;
+}
+
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1246,11 +1439,27 @@ retry:
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
        struct intel_context *ce;
+       int ret = 0;
 
        ce = oa_pin_context(stream);
        if (IS_ERR(ce))
                return PTR_ERR(ce);
 
+       if (engine_supports_mi_query(stream->engine)) {
+               /*
+                * We are enabling perf query here. If we don't find the context
+                * offset here, just return an error.
+                */
+               ret = set_oa_ctx_ctrl_offset(ce);
+               if (ret) {
+                       intel_context_unpin(ce);
+                       drm_err(&stream->perf->i915->drm,
+                               "Enabling perf query failed for %s\n",
+                               stream->engine->name);
+                       return ret;
+               }
+       }
+
        switch (GRAPHICS_VER(ce->engine->i915)) {
        case 7: {
                /*
@@ -1292,24 +1501,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 
        case 11:
        case 12:
-               if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
-                       stream->specific_ctx_id_mask =
-                               ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
-                               (XEHP_SW_CTX_ID_SHIFT - 32);
-                       stream->specific_ctx_id =
-                               (XEHP_MAX_CONTEXT_HW_ID - 1) <<
-                               (XEHP_SW_CTX_ID_SHIFT - 32);
-               } else {
-                       stream->specific_ctx_id_mask =
-                               ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-                       /*
-                        * Pick an unused context id
-                        * 0 - BITS_PER_LONG are used by other contexts
-                        * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
-                        */
-                       stream->specific_ctx_id =
-                               (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-               }
+               ret = gen12_get_render_context_id(stream);
                break;
 
        default:
@@ -1323,7 +1515,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
                stream->specific_ctx_id,
                stream->specific_ctx_id_mask);
 
-       return 0;
+       return ret;
 }
 
 /**
@@ -1375,8 +1567,9 @@ free_noa_wait(struct i915_perf_stream *stream)
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
        struct i915_perf *perf = stream->perf;
+       struct intel_gt *gt = stream->engine->gt;
 
-       if (WARN_ON(stream != perf->exclusive_stream))
+       if (WARN_ON(stream != gt->perf.exclusive_stream))
                return;
 
        /*
@@ -1385,11 +1578,20 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
         *
         * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
         */
-       WRITE_ONCE(perf->exclusive_stream, NULL);
+       WRITE_ONCE(gt->perf.exclusive_stream, NULL);
        perf->ops.disable_metric_set(stream);
 
        free_oa_buffer(stream);
 
+       /*
+        * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
+        */
+       if (intel_uc_uses_guc_rc(&gt->uc) &&
+           (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+            IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)))
+               drm_WARN_ON(&gt->i915->drm,
+                           intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));
+
        intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
        intel_engine_pm_put(stream->engine);
 
@@ -1563,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 static int alloc_oa_buffer(struct i915_perf_stream *stream)
 {
        struct drm_i915_private *i915 = stream->perf->i915;
+       struct intel_gt *gt = stream->engine->gt;
        struct drm_i915_gem_object *bo;
        struct i915_vma *vma;
        int ret;
@@ -1582,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
        i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
 
        /* PreHSW required 512K alignment, HSW requires 16M */
-       vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+       vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto err_unref;
        }
+
+       /*
+        * PreHSW required 512K alignment.
+        * HSW and onwards, align to requested size of OA buffer.
+        */
+       ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
+       if (ret) {
+               drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
+               goto err_unref;
+       }
+
        stream->oa_buffer.vma = vma;
 
        stream->oa_buffer.vaddr =
@@ -1636,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
 static int alloc_noa_wait(struct i915_perf_stream *stream)
 {
        struct drm_i915_private *i915 = stream->perf->i915;
+       struct intel_gt *gt = stream->engine->gt;
        struct drm_i915_gem_object *bo;
        struct i915_vma *vma;
        const u64 delay_ticks = 0xffffffffffffffff -
@@ -1654,6 +1869,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
                DELTA_TARGET,
                N_CS_GPR
        };
+       i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ?
+                                         MI_PREDICATE_RESULT_2_ENGINE(base) :
+                                         MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
 
        bo = i915_gem_object_create_internal(i915, 4096);
        if (IS_ERR(bo)) {
@@ -1673,12 +1891,16 @@ retry:
         * multiple OA config BOs will have a jump to this address and it
         * needs to be fixed during the lifetime of the i915/perf stream.
         */
-       vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
+       vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto out_ww;
        }
 
+       ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       if (ret)
+               goto out_ww;
+
        batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
        if (IS_ERR(batch)) {
                ret = PTR_ERR(batch);
@@ -1691,7 +1913,7 @@ retry:
                        stream, cs, true /* save */, CS_GPR(i),
                        INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
        cs = save_restore_register(
-               stream, cs, true /* save */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
+               stream, cs, true /* save */, mi_predicate_result,
                INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
 
        /* First timestamp snapshot location. */
@@ -1745,7 +1967,10 @@ retry:
         */
        *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
        *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
-       *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+       *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+       if (HAS_MI_SET_PREDICATE(i915))
+               *cs++ = MI_SET_PREDICATE | 1;
 
        /* Restart from the beginning if we had timestamps roll over. */
        *cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1755,6 +1980,9 @@ retry:
        *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
        *cs++ = 0;
 
+       if (HAS_MI_SET_PREDICATE(i915))
+               *cs++ = MI_SET_PREDICATE;
+
        /*
         * Now add the diff between to previous timestamps and add it to :
         *      (((1 * << 64) - 1) - delay_ns)
@@ -1782,7 +2010,10 @@ retry:
         */
        *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
        *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
-       *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+       *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+       if (HAS_MI_SET_PREDICATE(i915))
+               *cs++ = MI_SET_PREDICATE | 1;
 
        /* Predicate the jump.  */
        *cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1792,13 +2023,16 @@ retry:
        *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
        *cs++ = 0;
 
+       if (HAS_MI_SET_PREDICATE(i915))
+               *cs++ = MI_SET_PREDICATE;
+
        /* Restore registers. */
        for (i = 0; i < N_CS_GPR; i++)
                cs = save_restore_register(
                        stream, cs, false /* restore */, CS_GPR(i),
                        INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
        cs = save_restore_register(
-               stream, cs, false /* restore */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
+               stream, cs, false /* restore */, mi_predicate_result,
                INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
 
        /* And return to the ring. */
@@ -2283,11 +2517,12 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
 {
        int err;
        struct intel_context *ce = stream->pinned_ctx;
-       u32 format = stream->oa_buffer.format;
+       u32 format = stream->oa_buffer.format->format;
+       u32 offset = stream->perf->ctx_oactxctrl_offset;
        struct flex regs_context[] = {
                {
                        GEN8_OACTXCONTROL,
-                       stream->perf->ctx_oactxctrl_offset + 1,
+                       offset + 1,
                        active ? GEN8_OA_COUNTER_RESUME : 0,
                },
        };
@@ -2312,12 +2547,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
                },
        };
 
-       /* Modify the context image of pinned context with regs_context*/
+       /* Modify the context image of pinned context with regs_context */
        err = intel_context_lock_pinned(ce);
        if (err)
                return err;
 
-       err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+       err = gen8_modify_context(ce, regs_context,
+                                 ARRAY_SIZE(regs_context));
        intel_context_unlock_pinned(ce);
        if (err)
                return err;
@@ -2359,10 +2595,11 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
 {
        struct drm_i915_private *i915 = stream->perf->i915;
        struct intel_engine_cs *engine;
+       struct intel_gt *gt = stream->engine->gt;
        struct i915_gem_context *ctx, *cn;
        int err;
 
-       lockdep_assert_held(&stream->perf->lock);
+       lockdep_assert_held(&gt->perf.lock);
 
        /*
         * The OA register config is setup through the context image. This image
@@ -2442,6 +2679,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
                           const struct i915_oa_config *oa_config,
                           struct i915_active *active)
 {
+       u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
        /* The MMIO offsets for Flex EU registers aren't contiguous */
        const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
 #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
@@ -2452,7 +2690,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
                },
                {
                        GEN8_OACTXCONTROL,
-                       stream->perf->ctx_oactxctrl_offset + 1,
+                       ctx_oactxctrl + 1,
                },
                { EU_PERF_CNTL0, ctx_flexeuN(0) },
                { EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -2540,12 +2778,26 @@ static int
 gen12_enable_metric_set(struct i915_perf_stream *stream,
                        struct i915_active *active)
 {
+       struct drm_i915_private *i915 = stream->perf->i915;
        struct intel_uncore *uncore = stream->uncore;
        struct i915_oa_config *oa_config = stream->oa_config;
        bool periodic = stream->periodic;
        u32 period_exponent = stream->period_exponent;
+       u32 sqcnt1;
        int ret;
 
+       /*
+        * Wa_1508761755:xehpsdv, dg2
+        * EU NOA signals behave incorrectly if EU clock gating is enabled.
+        * Disable thread stall DOP gating and EU DOP gating.
+        */
+       if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
+               intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+                                            _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+               intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+                                  _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
+       }
+
        intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
                           /* Disable clk ratio reports, like previous Gens. */
                           _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
@@ -2562,6 +2814,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
                            (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
                            : 0);
 
+       /*
+        * Initialize Super Queue Internal Cnt Register
+        * Set PMON Enable in order to collect valid metrics.
+        * Enable byets per clock reporting in OA for XEHPSDV onward.
+        */
+       sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+                (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+       intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
        /*
         * Update all contexts prior writing the mux configurations as we need
         * to make sure all slices/subslices are ON before writing to NOA
@@ -2611,6 +2873,19 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 {
        struct intel_uncore *uncore = stream->uncore;
+       struct drm_i915_private *i915 = stream->perf->i915;
+       u32 sqcnt1;
+
+       /*
+        * Wa_1508761755:xehpsdv, dg2
+        * Enable thread stall DOP gating and EU DOP gating.
+        */
+       if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
+               intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+                                            _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
+               intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+                                  _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING));
+       }
 
        /* Reset all contexts' slices/subslices configurations. */
        gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2621,6 +2896,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 
        /* Make sure we disable noa to save power. */
        intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+       sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+                (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+       /* Reset PMON Enable to save power. */
+       intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
 }
 
 static void gen7_oa_enable(struct i915_perf_stream *stream)
@@ -2630,7 +2911,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
        u32 ctx_id = stream->specific_ctx_id;
        bool periodic = stream->periodic;
        u32 period_exponent = stream->period_exponent;
-       u32 report_format = stream->oa_buffer.format;
+       u32 report_format = stream->oa_buffer.format->format;
 
        /*
         * Reset buf pointers so we don't forward reports from before now.
@@ -2656,7 +2937,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
 static void gen8_oa_enable(struct i915_perf_stream *stream)
 {
        struct intel_uncore *uncore = stream->uncore;
-       u32 report_format = stream->oa_buffer.format;
+       u32 report_format = stream->oa_buffer.format->format;
 
        /*
         * Reset buf pointers so we don't forward reports from before now.
@@ -2682,7 +2963,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
 static void gen12_oa_enable(struct i915_perf_stream *stream)
 {
        struct intel_uncore *uncore = stream->uncore;
-       u32 report_format = stream->oa_buffer.format;
+       u32 report_format = stream->oa_buffer.format->format;
 
        /*
         * If we don't want OA reports from the OA buffer, then we don't even
@@ -2838,6 +3119,30 @@ get_sseu_config(struct intel_sseu *out_sseu,
        return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
 }
 
+/*
+ * OA timestamp frequency = CS timestamp frequency in most platforms. On some
+ * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
+ * cases, return the adjusted CS timestamp frequency to the user.
+ */
+u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
+{
+       /* Wa_18013179988:dg2 */
+       if (IS_DG2(i915)) {
+               intel_wakeref_t wakeref;
+               u32 reg, shift;
+
+               with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref)
+                       reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0);
+
+               shift = REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK,
+                                     reg);
+
+               return to_gt(i915)->clock_frequency << (3 - shift);
+       }
+
+       return to_gt(i915)->clock_frequency;
+}
+
 /**
  * i915_oa_stream_init - validate combined props for OA stream and init
  * @stream: An i915 perf stream
@@ -2862,7 +3167,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 {
        struct drm_i915_private *i915 = stream->perf->i915;
        struct i915_perf *perf = stream->perf;
-       int format_size;
+       struct intel_gt *gt;
        int ret;
 
        if (!props->engine) {
@@ -2870,6 +3175,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
                        "OA engine not specified\n");
                return -EINVAL;
        }
+       gt = props->engine->gt;
 
        /*
         * If the sysfs metrics/ directory wasn't registered for some
@@ -2900,7 +3206,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
         * counter reports and marshal to the appropriate client
         * we currently only allow exclusive access
         */
-       if (perf->exclusive_stream) {
+       if (gt->perf.exclusive_stream) {
                drm_dbg(&stream->perf->i915->drm,
                        "OA unit already in use\n");
                return -EBUSY;
@@ -2917,20 +3223,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
        stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
-       format_size = perf->oa_formats[props->oa_format].size;
+       stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
+       if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
+               return -EINVAL;
 
        stream->sample_flags = props->sample_flags;
-       stream->sample_size += format_size;
-
-       stream->oa_buffer.format_size = format_size;
-       if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
-               return -EINVAL;
+       stream->sample_size += stream->oa_buffer.format->size;
 
        stream->hold_preemption = props->hold_preemption;
 
-       stream->oa_buffer.format =
-               perf->oa_formats[props->oa_format].format;
-
        stream->periodic = props->oa_periodic;
        if (stream->periodic)
                stream->period_exponent = props->oa_period_exponent;
@@ -2974,14 +3275,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
        intel_engine_pm_get(stream->engine);
        intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
 
+       /*
+        * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
+        * OA to lose the configuration state. Prevent this by overriding GUCRC
+        * mode.
+        */
+       if (intel_uc_uses_guc_rc(&gt->uc) &&
+           (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+            IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) {
+               ret = intel_guc_slpc_override_gucrc_mode(&gt->uc.guc.slpc,
+                                                        SLPC_GUCRC_MODE_GUCRC_NO_RC6);
+               if (ret) {
+                       drm_dbg(&stream->perf->i915->drm,
+                               "Unable to override gucrc mode\n");
+                       goto err_config;
+               }
+       }
+
        ret = alloc_oa_buffer(stream);
        if (ret)
                goto err_oa_buf_alloc;
 
        stream->ops = &i915_oa_stream_ops;
 
-       perf->sseu = props->sseu;
-       WRITE_ONCE(perf->exclusive_stream, stream);
+       stream->engine->gt->perf.sseu = props->sseu;
+       WRITE_ONCE(gt->perf.exclusive_stream, stream);
 
        ret = i915_perf_stream_enable_sync(stream);
        if (ret) {
@@ -2999,11 +3317,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
        stream->poll_check_timer.function = oa_poll_check_timer_cb;
        init_waitqueue_head(&stream->poll_wq);
        spin_lock_init(&stream->oa_buffer.ptr_lock);
+       mutex_init(&stream->lock);
 
        return 0;
 
 err_enable:
-       WRITE_ONCE(perf->exclusive_stream, NULL);
+       WRITE_ONCE(gt->perf.exclusive_stream, NULL);
        perf->ops.disable_metric_set(stream);
 
        free_oa_buffer(stream);
@@ -3033,7 +3352,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
                return;
 
        /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
-       stream = READ_ONCE(engine->i915->perf.exclusive_stream);
+       stream = READ_ONCE(engine->gt->perf.exclusive_stream);
        if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
                gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -3062,7 +3381,6 @@ static ssize_t i915_perf_read(struct file *file,
                              loff_t *ppos)
 {
        struct i915_perf_stream *stream = file->private_data;
-       struct i915_perf *perf = stream->perf;
        size_t offset = 0;
        int ret;
 
@@ -3086,14 +3404,14 @@ static ssize_t i915_perf_read(struct file *file,
                        if (ret)
                                return ret;
 
-                       mutex_lock(&perf->lock);
+                       mutex_lock(&stream->lock);
                        ret = stream->ops->read(stream, buf, count, &offset);
-                       mutex_unlock(&perf->lock);
+                       mutex_unlock(&stream->lock);
                } while (!offset && !ret);
        } else {
-               mutex_lock(&perf->lock);
+               mutex_lock(&stream->lock);
                ret = stream->ops->read(stream, buf, count, &offset);
-               mutex_unlock(&perf->lock);
+               mutex_unlock(&stream->lock);
        }
 
        /* We allow the poll checking to sometimes report false positive EPOLLIN
@@ -3140,9 +3458,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
  * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
  * will be woken for new stream data.
  *
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
  * Returns: any poll events that are ready without sleeping
  */
 static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
@@ -3181,12 +3496,11 @@ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
 static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
 {
        struct i915_perf_stream *stream = file->private_data;
-       struct i915_perf *perf = stream->perf;
        __poll_t ret;
 
-       mutex_lock(&perf->lock);
+       mutex_lock(&stream->lock);
        ret = i915_perf_poll_locked(stream, file, wait);
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&stream->lock);
 
        return ret;
 }
@@ -3285,9 +3599,6 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
  * @cmd: the ioctl request
  * @arg: the ioctl data
  *
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
  * Returns: zero on success or a negative error code. Returns -EINVAL for
  * an unknown ioctl request.
  */
@@ -3325,12 +3636,11 @@ static long i915_perf_ioctl(struct file *file,
                            unsigned long arg)
 {
        struct i915_perf_stream *stream = file->private_data;
-       struct i915_perf *perf = stream->perf;
        long ret;
 
-       mutex_lock(&perf->lock);
+       mutex_lock(&stream->lock);
        ret = i915_perf_ioctl_locked(stream, cmd, arg);
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&stream->lock);
 
        return ret;
 }
@@ -3342,7 +3652,7 @@ static long i915_perf_ioctl(struct file *file,
  * Frees all resources associated with the given i915 perf @stream, disabling
  * any associated data capture in the process.
  *
- * Note: The &perf->lock mutex has been taken to serialize
+ * Note: The &gt->perf.lock mutex has been taken to serialize
  * with any non-file-operation driver hooks.
  */
 static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
@@ -3374,10 +3684,16 @@ static int i915_perf_release(struct inode *inode, struct file *file)
 {
        struct i915_perf_stream *stream = file->private_data;
        struct i915_perf *perf = stream->perf;
+       struct intel_gt *gt = stream->engine->gt;
 
-       mutex_lock(&perf->lock);
+       /*
+        * Within this call, we know that the fd is being closed and we have no
+        * other user of stream->lock. Use the perf lock to destroy the stream
+        * here.
+        */
+       mutex_lock(&gt->perf.lock);
        i915_perf_destroy_locked(stream);
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&gt->perf.lock);
 
        /* Release the reference the perf stream kept on the driver. */
        drm_dev_put(&perf->i915->drm);
@@ -3410,7 +3726,7 @@ static const struct file_operations fops = {
  * See i915_perf_ioctl_open() for interface details.
  *
  * Implements further stream config validation and stream initialization on
- * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
+ * behalf of i915_perf_open_ioctl() with the &gt->perf.lock mutex
  * taken to serialize with any non-file-operation driver hooks.
  *
  * Note: at this point the @props have only been validated in isolation and
@@ -3565,8 +3881,10 @@ err:
 
 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 {
-       return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
-                                            2ULL << exponent);
+       u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
+       u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
+
+       return div_u64(nom + den - 1, den);
 }
 
 static __always_inline bool
@@ -3794,7 +4112,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
  * mutex to avoid an awkward lockdep with mmap_lock.
  *
  * Most of the implementation details are handled by
- * i915_perf_open_ioctl_locked() after taking the &perf->lock
+ * i915_perf_open_ioctl_locked() after taking the &gt->perf.lock
  * mutex for serializing with any non-file-operation driver hooks.
  *
  * Return: A newly opened i915 Perf stream file descriptor or negative
@@ -3805,6 +4123,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 {
        struct i915_perf *perf = &to_i915(dev)->perf;
        struct drm_i915_perf_open_param *param = data;
+       struct intel_gt *gt;
        struct perf_open_properties props;
        u32 known_open_flags;
        int ret;
@@ -3831,9 +4150,11 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
        if (ret)
                return ret;
 
-       mutex_lock(&perf->lock);
+       gt = props.engine->gt;
+
+       mutex_lock(&gt->perf.lock);
        ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&gt->perf.lock);
 
        return ret;
 }
@@ -3849,6 +4170,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 void i915_perf_register(struct drm_i915_private *i915)
 {
        struct i915_perf *perf = &i915->perf;
+       struct intel_gt *gt = to_gt(i915);
 
        if (!perf->i915)
                return;
@@ -3857,13 +4179,13 @@ void i915_perf_register(struct drm_i915_private *i915)
         * i915_perf_open_ioctl(); considering that we register after
         * being exposed to userspace.
         */
-       mutex_lock(&perf->lock);
+       mutex_lock(&gt->perf.lock);
 
        perf->metrics_kobj =
                kobject_create_and_add("metrics",
                                       &i915->drm.primary->kdev->kobj);
 
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&gt->perf.lock);
 }
 
 /**
@@ -3939,6 +4261,11 @@ static const struct i915_range gen12_oa_b_counters[] = {
        {}
 };
 
+static const struct i915_range xehp_oa_b_counters[] = {
+       { .start = 0xdc48, .end = 0xdc48 },     /* OAA_ENABLE_REG */
+       { .start = 0xdd00, .end = 0xdd48 },     /* OAG_LCE0_0 - OAA_LENABLE_REG */
+};
+
 static const struct i915_range gen7_oa_mux_regs[] = {
        { .start = 0x91b8, .end = 0x91cc },     /* OA_PERFCNT[1-2], OA_PERFMATRIX */
        { .start = 0x9800, .end = 0x9888 },     /* MICRO_BP0_0 - NOA_WRITE */
@@ -4013,6 +4340,12 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
        return reg_in_range_table(addr, gen12_oa_b_counters);
 }
 
+static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+       return reg_in_range_table(addr, xehp_oa_b_counters) ||
+               reg_in_range_table(addr, gen12_oa_b_counters);
+}
+
 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
        return reg_in_range_table(addr, gen12_oa_mux_regs);
@@ -4411,11 +4744,47 @@ static void oa_init_supported_formats(struct i915_perf *perf)
                oa_format_add(perf, I915_OA_FORMAT_C4_B8);
                break;
 
+       case INTEL_DG2:
+               oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+               oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+               break;
+
        default:
                MISSING_CASE(platform);
        }
 }
 
+static void i915_perf_init_info(struct drm_i915_private *i915)
+{
+       struct i915_perf *perf = &i915->perf;
+
+       switch (GRAPHICS_VER(i915)) {
+       case 8:
+               perf->ctx_oactxctrl_offset = 0x120;
+               perf->ctx_flexeu0_offset = 0x2ce;
+               perf->gen8_valid_ctx_bit = BIT(25);
+               break;
+       case 9:
+               perf->ctx_oactxctrl_offset = 0x128;
+               perf->ctx_flexeu0_offset = 0x3de;
+               perf->gen8_valid_ctx_bit = BIT(16);
+               break;
+       case 11:
+               perf->ctx_oactxctrl_offset = 0x124;
+               perf->ctx_flexeu0_offset = 0x78e;
+               perf->gen8_valid_ctx_bit = BIT(16);
+               break;
+       case 12:
+               /*
+                * Calculate offset at runtime in oa_pin_context for gen12 and
+                * cache the value in perf->ctx_oactxctrl_offset.
+                */
+               break;
+       default:
+               MISSING_CASE(GRAPHICS_VER(i915));
+       }
+}
+
 /**
  * i915_perf_init - initialize i915-perf state on module bind
  * @i915: i915 device instance
@@ -4429,12 +4798,6 @@ void i915_perf_init(struct drm_i915_private *i915)
 {
        struct i915_perf *perf = &i915->perf;
 
-       /* XXX const struct i915_perf_ops! */
-
-       /* i915_perf is not enabled for DG2 yet */
-       if (IS_DG2(i915))
-               return;
-
        perf->oa_formats = oa_formats;
        if (IS_HASWELL(i915)) {
                perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
@@ -4454,6 +4817,7 @@ void i915_perf_init(struct drm_i915_private *i915)
                 * execlist mode by default.
                 */
                perf->ops.read = gen8_oa_read;
+               i915_perf_init_info(i915);
 
                if (IS_GRAPHICS_VER(i915, 8, 9)) {
                        perf->ops.is_valid_b_counter_reg =
@@ -4473,18 +4837,6 @@ void i915_perf_init(struct drm_i915_private *i915)
                        perf->ops.enable_metric_set = gen8_enable_metric_set;
                        perf->ops.disable_metric_set = gen8_disable_metric_set;
                        perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
-                       if (GRAPHICS_VER(i915) == 8) {
-                               perf->ctx_oactxctrl_offset = 0x120;
-                               perf->ctx_flexeu0_offset = 0x2ce;
-
-                               perf->gen8_valid_ctx_bit = BIT(25);
-                       } else {
-                               perf->ctx_oactxctrl_offset = 0x128;
-                               perf->ctx_flexeu0_offset = 0x3de;
-
-                               perf->gen8_valid_ctx_bit = BIT(16);
-                       }
                } else if (GRAPHICS_VER(i915) == 11) {
                        perf->ops.is_valid_b_counter_reg =
                                gen7_is_valid_b_counter_addr;
@@ -4498,13 +4850,10 @@ void i915_perf_init(struct drm_i915_private *i915)
                        perf->ops.enable_metric_set = gen8_enable_metric_set;
                        perf->ops.disable_metric_set = gen11_disable_metric_set;
                        perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
-                       perf->ctx_oactxctrl_offset = 0x124;
-                       perf->ctx_flexeu0_offset = 0x78e;
-
-                       perf->gen8_valid_ctx_bit = BIT(16);
                } else if (GRAPHICS_VER(i915) == 12) {
                        perf->ops.is_valid_b_counter_reg =
+                               HAS_OA_SLICE_CONTRIB_LIMITS(i915) ?
+                               xehp_is_valid_b_counter_addr :
                                gen12_is_valid_b_counter_addr;
                        perf->ops.is_valid_mux_reg =
                                gen12_is_valid_mux_addr;
@@ -4516,14 +4865,15 @@ void i915_perf_init(struct drm_i915_private *i915)
                        perf->ops.enable_metric_set = gen12_enable_metric_set;
                        perf->ops.disable_metric_set = gen12_disable_metric_set;
                        perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
-
-                       perf->ctx_flexeu0_offset = 0;
-                       perf->ctx_oactxctrl_offset = 0x144;
                }
        }
 
        if (perf->ops.enable_metric_set) {
-               mutex_init(&perf->lock);
+               struct intel_gt *gt;
+               int i;
+
+               for_each_gt(gt, i915, i)
+                       mutex_init(&gt->perf.lock);
 
                /* Choose a representative limit */
                oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
index 1d1329e5af3ae2cd40193158c065cec11cdd8852..f96e09a4af04223c542827c2c60f10a5edc41a4b 100644 (file)
@@ -57,4 +57,6 @@ static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
        kref_put(&oa_config->ref, i915_oa_config_release);
 }
 
+u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915);
+
 #endif /* __I915_PERF_H__ */
index f31c9f13a9fc156656b1944054a4dbd8f9d1faf5..381d94101610a3d1975738b229e7ca3881712788 100644 (file)
@@ -97,7 +97,7 @@
 #define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
 #define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)
 
-#define GEN12_OACTXCONTROL _MMIO(0x2360)
+#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
 #define GEN12_OAR_OASTATUS _MMIO(0x2968)
 
 /* Gen12 OAG unit */
 #define GDT_CHICKEN_BITS    _MMIO(0x9840)
 #define   GT_NOA_ENABLE            0x00000080
 
+#define GEN12_SQCNT1                           _MMIO(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE             REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC                   REG_BIT(29)
+
 #endif /* __INTEL_PERF_OA_REGS__ */
index 05cb9a335a971e230a44be77607cc1d74c00db5f..e0c96b44eda8e05e7af81bf757d58f566ef7b8b5 100644 (file)
@@ -146,6 +146,11 @@ struct i915_perf_stream {
         */
        struct intel_engine_cs *engine;
 
+       /*
+        * Lock associated with operations on stream
+        */
+       struct mutex lock;
+
        /**
         * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
         * properties given when opening a stream, representing the contents
@@ -245,11 +250,10 @@ struct i915_perf_stream {
         * @oa_buffer: State of the OA buffer.
         */
        struct {
+               const struct i915_oa_format *format;
                struct i915_vma *vma;
                u8 *vaddr;
                u32 last_ctx_id;
-               int format;
-               int format_size;
                int size_exponent;
 
                /**
@@ -380,6 +384,26 @@ struct i915_oa_ops {
        u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
 };
 
+struct i915_perf_gt {
+       /*
+        * Lock associated with anything below within this structure.
+        */
+       struct mutex lock;
+
+       /**
+        * @sseu: sseu configuration selected to run while perf is active,
+        * applies to all contexts.
+        */
+       struct intel_sseu sseu;
+
+       /*
+        * @exclusive_stream: The stream currently using the OA unit. This is
+        * sometimes accessed outside a syscall associated to its file
+        * descriptor.
+        */
+       struct i915_perf_stream *exclusive_stream;
+};
+
 struct i915_perf {
        struct drm_i915_private *i915;
 
@@ -397,25 +421,6 @@ struct i915_perf {
         */
        struct idr metrics_idr;
 
-       /*
-        * Lock associated with anything below within this structure
-        * except exclusive_stream.
-        */
-       struct mutex lock;
-
-       /*
-        * The stream currently using the OA unit. If accessed
-        * outside a syscall associated to its file
-        * descriptor.
-        */
-       struct i915_perf_stream *exclusive_stream;
-
-       /**
-        * @sseu: sseu configuration selected to run while perf is active,
-        * applies to all contexts.
-        */
-       struct intel_sseu sseu;
-
        /**
         * For rate limiting any notifications of spurious
         * invalid OA reports
index 2a887cdd7c1b29079d9b66e47bf86947a80a603b..1c0da50c0dc73589714047c47aab60343295f3f1 100644 (file)
 #define XEHPSDV_RP_STATE_CAP   _MMIO(0x250014)
 #define PVC_RP_STATE_CAP       _MMIO(0x281014)
 
+#define MTL_RP_STATE_CAP       _MMIO(0x138000)
+#define MTL_MEDIAP_STATE_CAP   _MMIO(0x138020)
+#define   MTL_RP0_CAP_MASK     REG_GENMASK(8, 0)
+#define   MTL_RPN_CAP_MASK     REG_GENMASK(24, 16)
+
+#define MTL_GT_RPE_FREQUENCY   _MMIO(0x13800c)
+#define MTL_MPE_FREQUENCY      _MMIO(0x13802c)
+#define   MTL_RPE_MASK         REG_GENMASK(8, 0)
+
 #define GT0_PERF_LIMIT_REASONS         _MMIO(0x1381a8)
 #define   GT0_PERF_LIMIT_REASONS_MASK  0xde3
 #define   PROCHOT_MASK                 REG_BIT(0)
 #define   POWER_LIMIT_4_MASK           REG_BIT(8)
 #define   POWER_LIMIT_1_MASK           REG_BIT(10)
 #define   POWER_LIMIT_2_MASK           REG_BIT(11)
+#define   GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16)
+#define MTL_MEDIA_PERF_LIMIT_REASONS   _MMIO(0x138030)
 
 #define CHV_CLK_CTL1                   _MMIO(0x101100)
 #define VLV_CLK_CTL2                   _MMIO(0x101104)
 #define   DG1_PCODE_STATUS                     0x7E
 #define     DG1_UNCORE_GET_INIT_STATUS         0x0
 #define     DG1_UNCORE_INIT_STATUS_COMPLETE    0x1
+#define   PCODE_POWER_SETUP                    0x7C
+#define     POWER_SETUP_SUBCOMMAND_READ_I1     0x4
+#define     POWER_SETUP_SUBCOMMAND_WRITE_I1    0x5
+#define            POWER_SETUP_I1_WATTS                REG_BIT(31)
+#define            POWER_SETUP_I1_SHIFT                6       /* 10.6 fixed point format */
+#define            POWER_SETUP_I1_DATA_MASK            REG_GENMASK(15, 0)
 #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US    0x23
 #define   XEHP_PCODE_FREQUENCY_CONFIG          0x6e    /* xehpsdv, pvc */
 /* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */
@@ -7788,8 +7805,13 @@ enum skl_power_gate {
                                                           _ICL_PIPE_DSS_CTL2_PB, \
                                                           _ICL_PIPE_DSS_CTL2_PC)
 
+#define GGC                            _MMIO(0x108040)
+#define   GMS_MASK                     REG_GENMASK(15, 8)
+#define   GGMS_MASK                    REG_GENMASK(7, 6)
+
 #define GEN12_GSMBASE                  _MMIO(0x108100)
 #define GEN12_DSMBASE                  _MMIO(0x1080C0)
+#define   GEN12_BDSM_MASK              REG_GENMASK64(63, 20)
 
 #define XEHP_CLOCK_GATE_DIS            _MMIO(0x101014)
 #define   SGSI_SIDECLK_DIS             REG_BIT(17)
index 8f486f77609f16a3769aa9295ee41f792112bc37..f1859046a9c4842b491df5fa8c2020ae529525ae 100644 (file)
@@ -104,22 +104,21 @@ typedef struct {
 
 #define _MMIO(r) ((const i915_reg_t){ .reg = (r) })
 
-#define INVALID_MMIO_REG _MMIO(0)
-
-static __always_inline u32 i915_mmio_reg_offset(i915_reg_t reg)
-{
-       return reg.reg;
-}
+typedef struct {
+       u32 reg;
+} i915_mcr_reg_t;
 
-static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b)
-{
-       return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b);
-}
+#define INVALID_MMIO_REG _MMIO(0)
 
-static inline bool i915_mmio_reg_valid(i915_reg_t reg)
-{
-       return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
-}
+/*
+ * These macros can be used on either i915_reg_t or i915_mcr_reg_t since they're
+ * simply operations on the register's offset and don't care about the MCR vs
+ * non-MCR nature of the register.
+ */
+#define i915_mmio_reg_offset(r) \
+       _Generic((r), i915_reg_t: (r).reg, i915_mcr_reg_t: (r).reg)
+#define i915_mmio_reg_equal(a, b) (i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b))
+#define i915_mmio_reg_valid(r) (!i915_mmio_reg_equal(r, INVALID_MMIO_REG))
 
 #define VLV_DISPLAY_BASE               0x180000
 
index 62fad16a55e84f15ba3784a298e1155349f64297..f949a9495758a0c50b4cb402c3880c9f558f0a03 100644 (file)
@@ -1621,6 +1621,20 @@ i915_request_await_object(struct i915_request *to,
        return ret;
 }
 
+static void i915_request_await_huc(struct i915_request *rq)
+{
+       struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
+
+       /* don't stall kernel submissions! */
+       if (!rcu_access_pointer(rq->context->gem_context))
+               return;
+
+       if (intel_huc_wait_required(huc))
+               i915_sw_fence_await_sw_fence(&rq->submit,
+                                            &huc->delayed_load.fence,
+                                            &rq->hucq);
+}
+
 static struct i915_request *
 __i915_request_ensure_parallel_ordering(struct i915_request *rq,
                                        struct intel_timeline *timeline)
@@ -1702,6 +1716,16 @@ __i915_request_add_to_timeline(struct i915_request *rq)
        struct intel_timeline *timeline = i915_request_timeline(rq);
        struct i915_request *prev;
 
+       /*
+        * Media workloads may require HuC, so stall them until HuC loading is
+        * complete. Note that HuC not being loaded when a user submission
+        * arrives can only happen when HuC is loaded via GSC and in that case
+        * we still expect the window between us starting to accept submissions
+        * and HuC loading completion to be small (a few hundred ms).
+        */
+       if (rq->engine->class == VIDEO_DECODE_CLASS)
+               i915_request_await_huc(rq);
+
        /*
         * Dependency tracking and request ordering along the timeline
         * is special cased so that we can eliminate redundant ordering
index 47041ec68df8982eb9be0d0269cb77b8ac44dfd7..f5e1bb5e857aa0be1bc7a04a01c0877a1dab4c5e 100644 (file)
@@ -348,6 +348,11 @@ struct i915_request {
 #define        GUC_PRIO_FINI   0xfe
        u8 guc_prio;
 
+       /**
+        * @hucq: wait queue entry used to wait on the HuC load to complete
+        */
+       wait_queue_entry_t hucq;
+
        I915_SELFTEST_DECLARE(struct {
                struct list_head link;
                unsigned long delay;
index 9ddb3e743a3e517dfe5d4006c822d396cef9492d..b0a1db44f89504a43ebb48c28ca58bc70bb0214d 100644 (file)
@@ -9,7 +9,8 @@
 
 #include <linux/pfn.h>
 #include <linux/scatterlist.h>
-#include <linux/swiotlb.h>
+#include <linux/dma-mapping.h>
+#include <xen/xen.h>
 
 #include "i915_gem.h"
 
@@ -127,19 +128,26 @@ static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg)
        return page_sizes;
 }
 
-static inline unsigned int i915_sg_segment_size(void)
+static inline unsigned int i915_sg_segment_size(struct device *dev)
 {
-       unsigned int size = swiotlb_max_segment();
-
-       if (size == 0)
-               size = UINT_MAX;
-
-       size = rounddown(size, PAGE_SIZE);
-       /* swiotlb_max_segment_size can return 1 byte when it means one page. */
-       if (size < PAGE_SIZE)
-               size = PAGE_SIZE;
-
-       return size;
+       size_t max = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
+
+       /*
+        * For Xen PV guests pages aren't contiguous in DMA (machine) address
+        * space.  The DMA API takes care of that both in dma_alloc_* (by
+        * calling into the hypervisor to make the pages contiguous) and in
+        * dma_map_* (by bounce buffering).  But i915 abuses ignores the
+        * coherency aspects of the DMA API and thus can't cope with bounce
+        * buffering actually happening, so add a hack here to force small
+        * allocations and mappings when running in PV mode on Xen.
+        *
+        * Note this will still break if bounce buffering is required for other
+        * reasons, like confidential computing hypervisors or PCIe root ports
+        * with addressing limitations.
+        */
+       if (xen_pv_domain())
+               max = PAGE_SIZE;
+       return round_down(max, PAGE_SIZE);
 }
 
 bool i915_sg_trim(struct sg_table *orig_st);
index f54de0499be72c978a9e0e84066f6c0456d19139..bdf3e22c0a34d96253eb2c8741b8b4ed9e3d7113 100644 (file)
@@ -92,12 +92,14 @@ int __i915_subtests(const char *caller,
                        T, ARRAY_SIZE(T), data)
 #define i915_live_subtests(T, data) ({ \
        typecheck(struct drm_i915_private *, data); \
+       (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \
        __i915_subtests(__func__, \
                        __i915_live_setup, __i915_live_teardown, \
                        T, ARRAY_SIZE(T), data); \
 })
 #define intel_gt_live_subtests(T, data) ({ \
        typecheck(struct intel_gt *, data); \
+       (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \
        __i915_subtests(__func__, \
                        __intel_gt_live_setup, __intel_gt_live_teardown, \
                        T, ARRAY_SIZE(T), data); \
index 37b5c9e9d260e8a85eeb78422438f3404e44e5f0..c70a02517e02119e6effaa8a71453b02f60af47b 100644 (file)
@@ -671,21 +671,6 @@ TRACE_EVENT_CONDITION(i915_reg_rw,
                (u32)(__entry->val >> 32))
 );
 
-TRACE_EVENT(intel_gpu_freq_change,
-           TP_PROTO(u32 freq),
-           TP_ARGS(freq),
-
-           TP_STRUCT__entry(
-                            __field(u32, freq)
-                            ),
-
-           TP_fast_assign(
-                          __entry->freq = freq;
-                          ),
-
-           TP_printk("new_freq=%u", __entry->freq)
-);
-
 /**
  * DOC: i915_ppgtt_create and i915_ppgtt_release tracepoints
  *
index f17c09ead7d778752c332c267c9da41b07d8dc22..c39488eb9eeba069c5deaf46ad0b9463d6bb4d87 100644 (file)
@@ -776,12 +776,6 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
        alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
-       /*
-        * for compact-pt we round up the reservation to prevent
-        * any smaller pages being used within the same PDE
-        */
-       if (NEEDS_COMPACT_PT(vma->vm->i915))
-               size = round_up(size, alignment);
 
        /* If binding the object/GGTT view requires more space than the entire
         * aperture has, reject it early before evicting everything in a vain
@@ -820,7 +814,8 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
                 * forseeable future. See also i915_ggtt_offset().
                 */
                if (upper_32_bits(end - 1) &&
-                   vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+                   vma->page_sizes.sg > I915_GTT_PAGE_SIZE &&
+                   !HAS_64K_PAGES(vma->vm->i915)) {
                        /*
                         * We can't mix 64K and 4K PTEs in the same page-table
                         * (2M block), and so to avoid the ugliness and
index 7b5dd8e21d7af010dab3ee6a2d323417cd5f2737..d588e5fd2eea953d1cc69202c8bb07088e02d7b7 100644 (file)
@@ -146,7 +146,6 @@ enum intel_ppgtt_type {
        /* Keep has_* in alphabetical order */ \
        func(has_64bit_reloc); \
        func(has_64k_pages); \
-       func(needs_compact_pt); \
        func(gpu_reset_clobbers_display); \
        func(has_reset_engine); \
        func(has_3d_pipeline); \
@@ -165,6 +164,8 @@ enum intel_ppgtt_type {
        func(has_logical_ring_elsq); \
        func(has_media_ratio_mode); \
        func(has_mslice_steering); \
+       func(has_oa_bpc_reporting); \
+       func(has_oa_slice_contrib_limits); \
        func(has_one_eu_per_fuse_bit); \
        func(has_pxp); \
        func(has_rc6); \
index 8279dc580a3e507bb9aa4a2333df74470a76514c..638b77d64bf43e0f32731aa230a51f96ffd87d6a 100644 (file)
@@ -102,7 +102,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter)
        MMIO_D(_MMIO(0x2438));
        MMIO_D(_MMIO(0x243c));
        MMIO_D(_MMIO(0x7018));
-       MMIO_D(HALF_SLICE_CHICKEN3);
+       MMIO_D(HSW_HALF_SLICE_CHICKEN3);
        MMIO_D(GEN7_HALF_SLICE_CHICKEN1);
        /* display */
        MMIO_F(_MMIO(0x60220), 0x20);
index ffc702b79579ed2f2c5b5c26643f371b91f9c945..f93e9af43ac357229c16be4d155d957fb28d552e 100644 (file)
 #define  DG1_QCLK_RATIO_MASK                   REG_GENMASK(9, 2)
 #define  DG1_QCLK_REFERENCE                    REG_BIT(10)
 
+/*
+ * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
+ */
+#define PCU_PACKAGE_POWER_SKU                  _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930)
+#define   PKG_PKG_TDP                          GENMASK_ULL(14, 0)
+#define   PKG_MAX_WIN                          GENMASK_ULL(54, 48)
+#define     PKG_MAX_WIN_X                      GENMASK_ULL(54, 53)
+#define     PKG_MAX_WIN_Y                      GENMASK_ULL(52, 48)
+
+#define PCU_PACKAGE_POWER_SKU_UNIT             _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define   PKG_PWR_UNIT                         REG_GENMASK(3, 0)
+#define   PKG_ENERGY_UNIT                      REG_GENMASK(12, 8)
+#define   PKG_TIME_UNIT                                REG_GENMASK(19, 16)
+#define PCU_PACKAGE_ENERGY_STATUS              _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c)
+
 #define GEN6_GT_PERF_STATUS                    _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
 #define GEN6_RP_STATE_LIMITS                   _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
 #define GEN6_RP_STATE_CAP                      _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
 
 #define GEN10_FREQ_INFO_REC                    _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK                             REG_GENMASK(15, 8)
+#define PCU_PACKAGE_RAPL_LIMIT                 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define   PKG_PWR_LIM_1                                REG_GENMASK(14, 0)
+#define   PKG_PWR_LIM_1_EN                     REG_BIT(15)
+#define   PKG_PWR_LIM_1_TIME                   REG_GENMASK(23, 17)
+#define   PKG_PWR_LIM_1_TIME_X                 REG_GENMASK(23, 22)
+#define   PKG_PWR_LIM_1_TIME_Y                 REG_GENMASK(21, 17)
 
 /* snb MCH registers for priority tuning */
 #define MCH_SSKPD                              _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
index 4977a524ce6f3f458704360a6dbeebe4265ece03..23b8e519f333cfbbf6f7a08407fe02e01432c156 100644 (file)
@@ -7,11 +7,29 @@
 #define __INTEL_PCI_CONFIG_H__
 
 /* PCI BARs */
-#define GTTMMADR_BAR                           0
-#define GEN2_GTTMMADR_BAR                      1
-#define GFXMEM_BAR                             2
-#define GTT_APERTURE_BAR                       GFXMEM_BAR
-#define GEN12_LMEM_BAR                         GFXMEM_BAR
+#define GEN2_GMADR_BAR                         0
+#define GEN2_MMADR_BAR                         1 /* MMIO+GTT, despite the name */
+#define GEN2_IO_BAR                            2 /* 85x/865 */
+
+#define GEN3_MMADR_BAR                         0 /* MMIO only */
+#define GEN3_IO_BAR                            1
+#define GEN3_GMADR_BAR                         2
+#define GEN3_GTTADR_BAR                                3 /* GTT only */
+
+#define GEN4_GTTMMADR_BAR                      0 /* MMIO+GTT */
+#define GEN4_GMADR_BAR                         2
+#define GEN4_IO_BAR                            4
+
+#define GEN12_LMEM_BAR                         2
+
+static inline int intel_mmio_bar(int graphics_ver)
+{
+       switch (graphics_ver) {
+       case 2: return GEN2_MMADR_BAR;
+       case 3: return GEN3_MMADR_BAR;
+       default: return GEN4_GTTMMADR_BAR;
+       }
+}
 
 /* BSM in include/drm/i915_drm.h */
 
index 19d4a88184d7a1b74c1a728afcfc3dead60f4e2a..ee34e278563603ddb08141aa87a59ed3550e1f05 100644 (file)
@@ -30,6 +30,8 @@
 #include "display/skl_watermark.h"
 
 #include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 
 #include "i915_drv.h"
@@ -58,25 +60,20 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
                 * Must match Sampler, Pixel Back End, and Media. See
                 * WaCompressedResourceSamplerPbeMediaNewHashMode.
                 */
-               intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
-                          intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) |
-                          SKL_DE_COMPRESSED_HASH_MODE);
+               intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_DE_COMPRESSED_HASH_MODE);
        }
 
        /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
-       intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
-                  intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_EDP_PSR_FIX_RDWRAP);
 
        /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
-       intel_uncore_write(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1,
-                  intel_uncore_read(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+       intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, 0, MASK_WAKEMEM);
 
        /*
         * WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl
         * Display WA #0859: skl,bxt,kbl,glk,cfl
         */
-       intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
-                  DISP_FBC_MEMORY_WAKE);
+       intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_MEMORY_WAKE);
 }
 
 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -84,15 +81,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
        gen9_init_clock_gating(dev_priv);
 
        /* WaDisableSDEUnitClockGating:bxt */
-       intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
-                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
        /*
         * FIXME:
         * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
         */
-       intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
-                  GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+       intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 
        /*
         * Wa: Backlight PWM may stop in the asserted state, causing backlight
@@ -113,16 +108,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
         * WaFbcTurnOffFbcWatermark:bxt
         * Display WA #0562: bxt
         */
-       intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
-                  DISP_FBC_WM_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
 
        /*
         * WaFbcHighMemBwCorruptionAvoidance:bxt
         * Display WA #0883: bxt
         */
-       intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
-                          intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
-                          DPFC_DISABLE_DUMMY0);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0);
 }
 
 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4053,9 +4045,9 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
  */
 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
 {
-       intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM3_LP_ILK) & ~WM_LP_ENABLE);
-       intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM2_LP_ILK) & ~WM_LP_ENABLE);
-       intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM1_LP_ILK) & ~WM_LP_ENABLE);
+       intel_uncore_rmw(&dev_priv->uncore, WM3_LP_ILK, WM_LP_ENABLE, 0);
+       intel_uncore_rmw(&dev_priv->uncore, WM2_LP_ILK, WM_LP_ENABLE, 0);
+       intel_uncore_rmw(&dev_priv->uncore, WM1_LP_ILK, WM_LP_ENABLE, 0);
 
        /*
         * Don't touch WM_LP_SPRITE_ENABLE here.
@@ -4109,9 +4101,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
        enum pipe pipe;
 
        for_each_pipe(dev_priv, pipe) {
-               intel_uncore_write(&dev_priv->uncore, DSPCNTR(pipe),
-                          intel_uncore_read(&dev_priv->uncore, DSPCNTR(pipe)) |
-                          DISP_TRICKLE_FEED_DISABLE);
+               intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(pipe), 0, DISP_TRICKLE_FEED_DISABLE);
 
                intel_uncore_rmw(&dev_priv->uncore, DSPSURF(pipe), 0, 0);
                intel_uncore_posting_read(&dev_priv->uncore, DSPSURF(pipe));
@@ -4160,19 +4150,13 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
         */
        if (IS_IRONLAKE_M(dev_priv)) {
                /* WaFbcAsynchFlipDisableFbcQueue:ilk */
-               intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1,
-                          intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) |
-                          ILK_FBCQ_DIS);
-               intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
-                          intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
-                          ILK_DPARB_GATE);
+               intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS);
+               intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_DPARB_GATE);
        }
 
        intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate);
 
-       intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
-                  intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
-                  ILK_ELPIN_409_SELECT);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT);
 
        g4x_disable_trickle_feed(dev_priv);
 
@@ -4192,8 +4176,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
        intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
                   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
                   PCH_CPUNIT_CLOCK_GATE_DISABLE);
-       intel_uncore_write(&dev_priv->uncore, SOUTH_CHICKEN2, intel_uncore_read(&dev_priv->uncore, SOUTH_CHICKEN2) |
-                  DPLS_EDP_PPS_FIX_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, SOUTH_CHICKEN2, 0, DPLS_EDP_PPS_FIX_DIS);
        /* The below fixes the weird display corruption, a few pixels shifted
         * downward, on (only) LVDS of some HP laptops with IVY.
         */
@@ -4231,9 +4214,7 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
 
        intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate);
 
-       intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
-                  intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
-                  ILK_ELPIN_409_SELECT);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT);
 
        intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1,
                   intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
@@ -4293,14 +4274,12 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
         * disabled when not needed anymore in order to save power.
         */
        if (HAS_PCH_LPT_LP(dev_priv))
-               intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D,
-                          intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) |
-                          PCH_LP_PARTITION_LEVEL_DISABLE);
+               intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D,
+                                0, PCH_LP_PARTITION_LEVEL_DISABLE);
 
        /* WADPOClockGatingDisable:hsw */
-       intel_uncore_write(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A),
-                  intel_uncore_read(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A)) |
-                  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A),
+                        0, TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
 }
 
 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
@@ -4321,22 +4300,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
        u32 val;
 
        /* WaTempDisableDOPClkGating:bdw */
-       misccpctl = intel_uncore_rmw(&dev_priv->uncore, GEN7_MISCCPCTL,
-                                    GEN7_DOP_CLOCK_GATE_ENABLE, 0);
+       misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
+                                              GEN8_DOP_CLOCK_GATE_ENABLE, 0);
 
-       val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1);
+       val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
        val &= ~L3_PRIO_CREDITS_MASK;
        val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
        val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
-       intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val);
+       intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val);
 
        /*
         * Wait at least 100 clocks before re-enabling clock gating.
         * See the definition of L3SQCREG1 in BSpec.
         */
-       intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1);
+       intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
        udelay(1);
-       intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, misccpctl);
+       intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, misccpctl);
 }
 
 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4359,8 +4338,7 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv)
 
        /* Wa_1409825376:tgl (pre-prod)*/
        if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0))
-               intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
-                          TGL_VRH_GATING_DIS);
+               intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, TGL_VRH_GATING_DIS);
 
        /* Wa_14013723622:tgl,rkl,dg1,adl-s */
        if (DISPLAY_VER(dev_priv) == 12)
@@ -4385,8 +4363,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
 
        /* Wa_1409836686:dg1[a0] */
        if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
-               intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
-                          DPT_GATING_DIS);
+               intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, DPT_GATING_DIS);
 }
 
 static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4428,8 +4405,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
                return;
 
        /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
-       intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) |
-                  CNP_PWM_CGE_GATING_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, 0, CNP_PWM_CGE_GATING_DISABLE);
 }
 
 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4438,23 +4414,20 @@ static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
        gen9_init_clock_gating(dev_priv);
 
        /* WAC6entrylatency:cfl */
-       intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
-                  FBC_LLC_FULLY_OPEN);
+       intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
 
        /*
         * WaFbcTurnOffFbcWatermark:cfl
         * Display WA #0562: cfl
         */
-       intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
-                  DISP_FBC_WM_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
 
        /*
         * WaFbcNukeOnHostModify:cfl
         * Display WA #0873: cfl
         */
-       intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
-                          intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
-                          DPFC_NUKE_ON_ANY_MODIFICATION);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+                        0, DPFC_NUKE_ON_ANY_MODIFICATION);
 }
 
 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4462,33 +4435,30 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
        gen9_init_clock_gating(dev_priv);
 
        /* WAC6entrylatency:kbl */
-       intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
-                  FBC_LLC_FULLY_OPEN);
+       intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
 
        /* WaDisableSDEUnitClockGating:kbl */
        if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
-               intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
-                          GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+               intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6,
+                                0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
        /* WaDisableGamClockGating:kbl */
        if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
-               intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
-                          GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+               intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1,
+                                0, GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
 
        /*
         * WaFbcTurnOffFbcWatermark:kbl
         * Display WA #0562: kbl
         */
-       intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
-                  DISP_FBC_WM_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
 
        /*
         * WaFbcNukeOnHostModify:kbl
         * Display WA #0873: kbl
         */
-       intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
-                          intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
-                          DPFC_NUKE_ON_ANY_MODIFICATION);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+                        0, DPFC_NUKE_ON_ANY_MODIFICATION);
 }
 
 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4496,35 +4466,30 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
        gen9_init_clock_gating(dev_priv);
 
        /* WaDisableDopClockGating:skl */
-       intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, intel_uncore_read(&dev_priv->uncore, GEN7_MISCCPCTL) &
-                  ~GEN7_DOP_CLOCK_GATE_ENABLE);
+       intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
+                                  GEN8_DOP_CLOCK_GATE_ENABLE, 0);
 
        /* WAC6entrylatency:skl */
-       intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
-                  FBC_LLC_FULLY_OPEN);
+       intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
 
        /*
         * WaFbcTurnOffFbcWatermark:skl
         * Display WA #0562: skl
         */
-       intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
-                  DISP_FBC_WM_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
 
        /*
         * WaFbcNukeOnHostModify:skl
         * Display WA #0873: skl
         */
-       intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
-                          intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
-                          DPFC_NUKE_ON_ANY_MODIFICATION);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+                        0, DPFC_NUKE_ON_ANY_MODIFICATION);
 
        /*
         * WaFbcHighMemBwCorruptionAvoidance:skl
         * Display WA #0883: skl
         */
-       intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
-                          intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
-                          DPFC_DISABLE_DUMMY0);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0);
 }
 
 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4532,43 +4497,37 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
        enum pipe pipe;
 
        /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
-       intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
-                  intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
-                  HSW_FBCQ_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS);
 
        /* WaSwitchSolVfFArbitrationPriority:bdw */
-       intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+       intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL);
 
        /* WaPsrDPAMaskVBlankInSRD:bdw */
-       intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
-                  intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
+       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, DPA_MASK_VBLANK_SRD);
 
        for_each_pipe(dev_priv, pipe) {
                /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
-               intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
-                          intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) |
-                          BDW_DPRS_MASK_VBLANK_SRD);
+               intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+                                0, BDW_DPRS_MASK_VBLANK_SRD);
        }
 
        /* WaVSRefCountFullforceMissDisable:bdw */
        /* WaDSRefCountFullforceMissDisable:bdw */
-       intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
-                  intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) &
-                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
+                        GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0);
 
        intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE),
                   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
        /* WaDisableSDEUnitClockGating:bdw */
-       intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
-                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
        /* WaProgramL3SqcReg1Default:bdw */
        gen8_set_l3sqc_credits(dev_priv, 30, 2);
 
        /* WaKVMNotificationOnConfigChange:bdw */
-       intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR2_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR2_1)
-                  | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR2_1,
+                        0, KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
 
        lpt_init_clock_gating(dev_priv);
 
@@ -4577,24 +4536,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
         * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
         * clock gating.
         */
-       intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1,
-                  intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
 }
 
 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
 {
        /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
-       intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
-                  intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
-                  HSW_FBCQ_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS);
 
        /* This is required by WaCatErrorRejectionIssue:hsw */
-       intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                  intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                        0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
        /* WaSwitchSolVfFArbitrationPriority:hsw */
-       intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+       intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL);
 
        lpt_init_clock_gating(dev_priv);
 }
@@ -4604,9 +4559,7 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
        intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
        /* WaFbcAsynchFlipDisableFbcQueue:ivb */
-       intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1,
-                  intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) |
-                  ILK_FBCQ_DIS);
+       intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS);
 
        /* WaDisableBackToBackFlipFix:ivb */
        intel_uncore_write(&dev_priv->uncore, IVB_CHICKEN3,
@@ -4632,9 +4585,8 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
                   GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
        /* This is required by WaCatErrorRejectionIssue:ivb */
-       intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                       intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                       GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                        0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
        g4x_disable_trickle_feed(dev_priv);
 
@@ -4659,9 +4611,8 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
                   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 
        /* This is required by WaCatErrorRejectionIssue:vlv */
-       intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                  intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                        0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
        /*
         * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
@@ -4673,8 +4624,7 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
        /* WaDisableL3Bank2xClockGate:vlv
         * Disabling L3 clock gating- MMIO 940c[25] = 1
         * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
-       intel_uncore_write(&dev_priv->uncore, GEN7_UCGCTL4,
-                  intel_uncore_read(&dev_priv->uncore, GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_UCGCTL4, 0, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
 
        /*
         * WaDisableVLVClockGating_VBIIssue:vlv
@@ -4688,21 +4638,18 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
 {
        /* WaVSRefCountFullforceMissDisable:chv */
        /* WaDSRefCountFullforceMissDisable:chv */
-       intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
-                  intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) &
-                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+       intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
+                        GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0);
 
        /* WaDisableSemaphoreAndSyncFlipWait:chv */
        intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE),
                   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
        /* WaDisableCSUnitClockGating:chv */
-       intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
-                  GEN6_CSUNIT_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
        /* WaDisableSDEUnitClockGating:chv */
-       intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
-                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+       intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
        /*
         * WaProgramL3SqcReg1Default:chv
index 744cca507946be24b93a566adeaa16182a95c6da..129746713d072f06e7ad1a3e4ad3b8e8aaeb8898 100644 (file)
@@ -633,6 +633,8 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
                                                     runtime_pm);
        int count = atomic_read(&rpm->wakeref_count);
 
+       intel_wakeref_auto_fini(&rpm->userfault_wakeref);
+
        drm_WARN(&i915->drm, count,
                 "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
                 intel_rpm_raw_wakeref_count(count),
@@ -652,4 +654,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
        rpm->available = HAS_RUNTIME_PM(i915);
 
        init_intel_runtime_pm_wakeref(rpm);
+       INIT_LIST_HEAD(&rpm->lmem_userfault_list);
+       spin_lock_init(&rpm->lmem_userfault_lock);
+       intel_wakeref_auto_init(&rpm->userfault_wakeref, rpm);
 }
index d9160e3ff4afcf99adb8da12c58f0b1779da252b..98b8b28baaa15ef21ee6a804b66cc801868a309a 100644 (file)
@@ -53,6 +53,28 @@ struct intel_runtime_pm {
        bool irqs_enabled;
        bool no_wakeref_tracking;
 
+       /*
+        *  Protects access to lmem usefault list.
+        *  It is required, if we are outside of the runtime suspend path,
+        *  access to @lmem_userfault_list requires always first grabbing the
+        *  runtime pm, to ensure we can't race against runtime suspend.
+        *  Once we have that we also need to grab @lmem_userfault_lock,
+        *  at which point we have exclusive access.
+        *  The runtime suspend path is special since it doesn't really hold any locks,
+        *  but instead has exclusive access by virtue of all other accesses requiring
+        *  holding the runtime pm wakeref.
+        */
+       spinlock_t lmem_userfault_lock;
+
+       /*
+        *  Keep list of userfaulted gem obj, which require to release their
+        *  mmap mappings at runtime suspend path.
+        */
+       struct list_head lmem_userfault_list;
+
+       /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
+       struct intel_wakeref_auto userfault_wakeref;
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
        /*
         * To aide detection of wakeref leaks and general misuse, we
index 5cd423c7b64649f4e19f26d9e78780e70ce85a12..2a3e2869fe710042eeb646463cc2cf6ae5820bca 100644 (file)
@@ -104,6 +104,7 @@ static const char * const forcewake_domain_names[] = {
        "vebox1",
        "vebox2",
        "vebox3",
+       "gsc",
 };
 
 const char *
@@ -888,10 +889,13 @@ void assert_forcewakes_active(struct intel_uncore *uncore,
        spin_unlock_irq(&uncore->lock);
 }
 
-/* We give fast paths for the really cool registers */
+/*
+ * We give fast paths for the really cool registers.  The second range includes
+ * media domains (and the GSC starting from Xe_LPM+)
+ */
 #define NEEDS_FORCE_WAKE(reg) ({ \
        u32 __reg = (reg); \
-       __reg < 0x40000 || __reg >= GEN11_BSD_RING_BASE; \
+       __reg < 0x40000 || __reg >= 0x116000; \
 })
 
 static int fw_range_cmp(u32 offset, const struct intel_forcewake_range *entry)
@@ -1131,6 +1135,45 @@ static const struct i915_range pvc_shadowed_regs[] = {
        { .start = 0x1F8510, .end = 0x1F8550 },
 };
 
+static const struct i915_range mtl_shadowed_regs[] = {
+       { .start =   0x2030, .end =   0x2030 },
+       { .start =   0x2510, .end =   0x2550 },
+       { .start =   0xA008, .end =   0xA00C },
+       { .start =   0xA188, .end =   0xA188 },
+       { .start =   0xA278, .end =   0xA278 },
+       { .start =   0xA540, .end =   0xA56C },
+       { .start =   0xC050, .end =   0xC050 },
+       { .start =   0xC340, .end =   0xC340 },
+       { .start =   0xC4C8, .end =   0xC4C8 },
+       { .start =   0xC4E0, .end =   0xC4E0 },
+       { .start =   0xC600, .end =   0xC600 },
+       { .start =   0xC658, .end =   0xC658 },
+       { .start =   0xCFD4, .end =   0xCFDC },
+       { .start =  0x22030, .end =  0x22030 },
+       { .start =  0x22510, .end =  0x22550 },
+};
+
+static const struct i915_range xelpmp_shadowed_regs[] = {
+       { .start = 0x1C0030, .end = 0x1C0030 },
+       { .start = 0x1C0510, .end = 0x1C0550 },
+       { .start = 0x1C8030, .end = 0x1C8030 },
+       { .start = 0x1C8510, .end = 0x1C8550 },
+       { .start = 0x1D0030, .end = 0x1D0030 },
+       { .start = 0x1D0510, .end = 0x1D0550 },
+       { .start = 0x38A008, .end = 0x38A00C },
+       { .start = 0x38A188, .end = 0x38A188 },
+       { .start = 0x38A278, .end = 0x38A278 },
+       { .start = 0x38A540, .end = 0x38A56C },
+       { .start = 0x38A618, .end = 0x38A618 },
+       { .start = 0x38C050, .end = 0x38C050 },
+       { .start = 0x38C340, .end = 0x38C340 },
+       { .start = 0x38C4C8, .end = 0x38C4C8 },
+       { .start = 0x38C4E0, .end = 0x38C4E4 },
+       { .start = 0x38C600, .end = 0x38C600 },
+       { .start = 0x38C658, .end = 0x38C658 },
+       { .start = 0x38CFD4, .end = 0x38CFDC },
+};
+
 static int mmio_range_cmp(u32 key, const struct i915_range *range)
 {
        if (key < range->start)
@@ -1639,25 +1682,27 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = {
        GEN_FW_RANGE(0x12000, 0x12fff, 0), /*
                0x12000 - 0x127ff: always on
                0x12800 - 0x12fff: reserved */
-       GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /*
+       GEN_FW_RANGE(0x13000, 0x19fff, FORCEWAKE_GT), /*
                0x13000 - 0x135ff: gt
                0x13600 - 0x147ff: reserved
                0x14800 - 0x153ff: gt
-               0x15400 - 0x19fff: reserved
-               0x1a000 - 0x1ffff: gt
-               0x20000 - 0x21fff: reserved
-               0x22000 - 0x23fff: gt */
+               0x15400 - 0x19fff: reserved */
+       GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /*
+               0x1a000 - 0x1ffff: render
+               0x20000 - 0x21fff: reserved */
+       GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT),
        GEN_FW_RANGE(0x24000, 0x2417f, 0), /*
                24000 - 0x2407f: always on
                24080 - 0x2417f: reserved */
-       GEN_FW_RANGE(0x24180, 0x3ffff, FORCEWAKE_GT), /*
+       GEN_FW_RANGE(0x24180, 0x25fff, FORCEWAKE_GT), /*
                0x24180 - 0x241ff: gt
                0x24200 - 0x251ff: reserved
                0x25200 - 0x252ff: gt
-               0x25300 - 0x25fff: reserved
-               0x26000 - 0x27fff: gt
-               0x28000 - 0x2ffff: reserved
-               0x30000 - 0x3ffff: gt */
+               0x25300 - 0x25fff: reserved */
+       GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /*
+               0x26000 - 0x27fff: render
+               0x28000 - 0x2ffff: reserved */
+       GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT),
        GEN_FW_RANGE(0x40000, 0x1bffff, 0),
        GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /*
                0x1c0000 - 0x1c2bff: VD0
@@ -1679,6 +1724,162 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = {
        GEN_FW_RANGE(0x3e0000, 0x3effff, FORCEWAKE_GT),
 };
 
+static const struct intel_forcewake_range __mtl_fw_ranges[] = {
+       GEN_FW_RANGE(0x0, 0xaff, 0),
+       GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0xc00, 0xfff, 0),
+       GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
+       GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
+       GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_GT), /*
+               0x4000 - 0x48ff: render
+               0x4900 - 0x51ff: reserved */
+       GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), /*
+               0x5200 - 0x53ff: render
+               0x5400 - 0x54ff: reserved
+               0x5500 - 0x7fff: render */
+       GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER), /*
+               0x8140 - 0x815f: render
+               0x8160 - 0x817f: reserved */
+       GEN_FW_RANGE(0x8180, 0x81ff, 0),
+       GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /*
+               0x8200 - 0x87ff: gt
+               0x8800 - 0x8dff: reserved
+               0x8e00 - 0x8f7f: gt
+               0x8f80 - 0x8fff: reserved
+               0x9000 - 0x947f: gt
+               0x9480 - 0x94cf: reserved */
+       GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER),
+       GEN_FW_RANGE(0x9560, 0x967f, 0), /*
+               0x9560 - 0x95ff: always on
+               0x9600 - 0x967f: reserved */
+       GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /*
+               0x9680 - 0x96ff: render
+               0x9700 - 0x97ff: reserved */
+       GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /*
+               0x9800 - 0xb4ff: gt
+               0xb500 - 0xbfff: reserved
+               0xc000 - 0xcfff: gt */
+       GEN_FW_RANGE(0xd000, 0xd7ff, 0), /*
+               0xd000 - 0xd3ff: always on
+               0xd400 - 0xd7ff: reserved */
+       GEN_FW_RANGE(0xd800, 0xd87f, FORCEWAKE_RENDER),
+       GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER),
+       GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /*
+               0xdd00 - 0xddff: gt
+               0xde00 - 0xde7f: reserved */
+       GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /*
+               0xde80 - 0xdfff: render
+               0xe000 - 0xe0ff: reserved
+               0xe100 - 0xe8ff: render */
+       GEN_FW_RANGE(0xe900, 0xe9ff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0xea00, 0x147ff, 0), /*
+                0xea00 - 0x11fff: reserved
+               0x12000 - 0x127ff: always on
+               0x12800 - 0x147ff: reserved */
+       GEN_FW_RANGE(0x14800, 0x19fff, FORCEWAKE_GT), /*
+               0x14800 - 0x153ff: gt
+               0x15400 - 0x19fff: reserved */
+       GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /*
+               0x1a000 - 0x1bfff: render
+               0x1c000 - 0x21fff: reserved */
+       GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x24000, 0x2ffff, 0), /*
+               0x24000 - 0x2407f: always on
+               0x24080 - 0x2ffff: reserved */
+       GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT)
+};
+
+/*
+ * Note that the register ranges here are the final offsets after
+ * translation of the GSI block to the 0x380000 offset.
+ *
+ * NOTE:  There are a couple MCR ranges near the bottom of this table
+ * that need to power up either VD0 or VD2 depending on which replicated
+ * instance of the register we're trying to access.  Our forcewake logic
+ * at the moment doesn't have a good way to take steering into consideration,
+ * and the driver doesn't even access any registers in those ranges today,
+ * so for now we just mark those ranges as FORCEWAKE_ALL.  That will ensure
+ * proper operation if we do start using the ranges in the future, and we
+ * can determine at that time whether it's worth adding extra complexity to
+ * the forcewake handling to take steering into consideration.
+ */
+static const struct intel_forcewake_range __xelpmp_fw_ranges[] = {
+       GEN_FW_RANGE(0x0, 0x115fff, 0), /* render GT range */
+       GEN_FW_RANGE(0x116000, 0x11ffff, FORCEWAKE_GSC), /*
+               0x116000 - 0x117fff: gsc
+               0x118000 - 0x119fff: reserved
+               0x11a000 - 0x11efff: gsc
+               0x11f000 - 0x11ffff: reserved */
+       GEN_FW_RANGE(0x120000, 0x1bffff, 0), /* non-GT range */
+       GEN_FW_RANGE(0x1c0000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX0), /*
+               0x1c0000 - 0x1c3dff: VD0
+               0x1c3e00 - 0x1c3eff: reserved
+               0x1c3f00 - 0x1c3fff: VD0
+               0x1c4000 - 0x1c7fff: reserved */
+       GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /*
+               0x1c8000 - 0x1ca0ff: VE0
+               0x1ca100 - 0x1cbfff: reserved */
+       GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX0), /*
+               0x1cc000 - 0x1cdfff: VD0
+               0x1ce000 - 0x1cffff: reserved */
+       GEN_FW_RANGE(0x1d0000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX2), /*
+               0x1d0000 - 0x1d3dff: VD2
+               0x1d3e00 - 0x1d3eff: reserved
+               0x1d4000 - 0x1d7fff: VD2 */
+       GEN_FW_RANGE(0x1d8000, 0x1da0ff, FORCEWAKE_MEDIA_VEBOX1),
+       GEN_FW_RANGE(0x1da100, 0x380aff, 0), /*
+               0x1da100 - 0x23ffff: reserved
+               0x240000 - 0x37ffff: non-GT range
+               0x380000 - 0x380aff: reserved */
+       GEN_FW_RANGE(0x380b00, 0x380bff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x380c00, 0x380fff, 0),
+       GEN_FW_RANGE(0x381000, 0x38817f, FORCEWAKE_GT), /*
+               0x381000 - 0x381fff: gt
+               0x382000 - 0x383fff: reserved
+               0x384000 - 0x384aff: gt
+               0x384b00 - 0x3851ff: reserved
+               0x385200 - 0x3871ff: gt
+               0x387200 - 0x387fff: reserved
+               0x388000 - 0x38813f: gt
+               0x388140 - 0x38817f: reserved */
+       GEN_FW_RANGE(0x388180, 0x3882ff, 0), /*
+               0x388180 - 0x3881ff: always on
+               0x388200 - 0x3882ff: reserved */
+       GEN_FW_RANGE(0x388300, 0x38955f, FORCEWAKE_GT), /*
+               0x388300 - 0x38887f: gt
+               0x388880 - 0x388fff: reserved
+               0x389000 - 0x38947f: gt
+               0x389480 - 0x38955f: reserved */
+       GEN_FW_RANGE(0x389560, 0x389fff, 0), /*
+               0x389560 - 0x3895ff: always on
+               0x389600 - 0x389fff: reserved */
+       GEN_FW_RANGE(0x38a000, 0x38cfff, FORCEWAKE_GT), /*
+               0x38a000 - 0x38afff: gt
+               0x38b000 - 0x38bfff: reserved
+               0x38c000 - 0x38cfff: gt */
+       GEN_FW_RANGE(0x38d000, 0x38d11f, 0),
+       GEN_FW_RANGE(0x38d120, 0x391fff, FORCEWAKE_GT), /*
+               0x38d120 - 0x38dfff: gt
+               0x38e000 - 0x38efff: reserved
+               0x38f000 - 0x38ffff: gt
+               0x389000 - 0x391fff: reserved */
+       GEN_FW_RANGE(0x392000, 0x392fff, 0), /*
+               0x392000 - 0x3927ff: always on
+               0x392800 - 0x292fff: reserved */
+       GEN_FW_RANGE(0x393000, 0x3931ff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x393200, 0x39323f, FORCEWAKE_ALL), /* instance-based, see note above */
+       GEN_FW_RANGE(0x393240, 0x3933ff, FORCEWAKE_GT),
+       GEN_FW_RANGE(0x393400, 0x3934ff, FORCEWAKE_ALL), /* instance-based, see note above */
+       GEN_FW_RANGE(0x393500, 0x393c7f, 0), /*
+               0x393500 - 0x393bff: reserved
+               0x393c00 - 0x393c7f: always on */
+       GEN_FW_RANGE(0x393c80, 0x393dff, FORCEWAKE_GT),
+};
+
 static void
 ilk_dummy_write(struct intel_uncore *uncore)
 {
@@ -2021,6 +2222,7 @@ static int __fw_domain_init(struct intel_uncore *uncore,
        BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1));
        BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2));
        BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3));
+       BUILD_BUG_ON(FORCEWAKE_GSC != (1 << FW_DOMAIN_ID_GSC));
 
        d->mask = BIT(domain_id);
 
@@ -2085,17 +2287,26 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
        (ret ?: (ret = __fw_domain_init((uncore__), (id__), (set__), (ack__))))
 
        if (GRAPHICS_VER(i915) >= 11) {
-               /* we'll prune the domains of missing engines later */
-               intel_engine_mask_t emask = RUNTIME_INFO(i915)->platform_engine_mask;
+               intel_engine_mask_t emask;
                int i;
 
+               /* we'll prune the domains of missing engines later */
+               emask = uncore->gt->info.engine_mask;
+
                uncore->fw_get_funcs = &uncore_get_fallback;
-               fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
-                              FORCEWAKE_RENDER_GEN9,
-                              FORCEWAKE_ACK_RENDER_GEN9);
-               fw_domain_init(uncore, FW_DOMAIN_ID_GT,
-                              FORCEWAKE_GT_GEN9,
-                              FORCEWAKE_ACK_GT_GEN9);
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+                       fw_domain_init(uncore, FW_DOMAIN_ID_GT,
+                                      FORCEWAKE_GT_GEN9,
+                                      FORCEWAKE_ACK_GT_MTL);
+               else
+                       fw_domain_init(uncore, FW_DOMAIN_ID_GT,
+                                      FORCEWAKE_GT_GEN9,
+                                      FORCEWAKE_ACK_GT_GEN9);
+
+               if (RCS_MASK(uncore->gt) || CCS_MASK(uncore->gt))
+                       fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
+                                      FORCEWAKE_RENDER_GEN9,
+                                      FORCEWAKE_ACK_RENDER_GEN9);
 
                for (i = 0; i < I915_MAX_VCS; i++) {
                        if (!__HAS_ENGINE(emask, _VCS(i)))
@@ -2113,6 +2324,10 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
                                       FORCEWAKE_MEDIA_VEBOX_GEN11(i),
                                       FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
                }
+
+               if (uncore->gt->type == GT_MEDIA)
+                       fw_domain_init(uncore, FW_DOMAIN_ID_GSC,
+                                      FORCEWAKE_REQ_GSC, FORCEWAKE_ACK_GSC);
        } else if (IS_GRAPHICS_VER(i915, 9, 10)) {
                uncore->fw_get_funcs = &uncore_get_fallback;
                fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
@@ -2300,6 +2515,22 @@ static void uncore_raw_init(struct intel_uncore *uncore)
        }
 }
 
+static int uncore_media_forcewake_init(struct intel_uncore *uncore)
+{
+       struct drm_i915_private *i915 = uncore->i915;
+
+       if (MEDIA_VER(i915) >= 13) {
+               ASSIGN_FW_DOMAINS_TABLE(uncore, __xelpmp_fw_ranges);
+               ASSIGN_SHADOW_TABLE(uncore, xelpmp_shadowed_regs);
+               ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
+       } else {
+               MISSING_CASE(MEDIA_VER(i915));
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
 static int uncore_forcewake_init(struct intel_uncore *uncore)
 {
        struct drm_i915_private *i915 = uncore->i915;
@@ -2314,7 +2545,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore)
 
        ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable);
 
-       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) {
+       if (uncore->gt->type == GT_MEDIA)
+               return uncore_media_forcewake_init(uncore);
+
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+               ASSIGN_FW_DOMAINS_TABLE(uncore, __mtl_fw_ranges);
+               ASSIGN_SHADOW_TABLE(uncore, mtl_shadowed_regs);
+               ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
+       } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) {
                ASSIGN_FW_DOMAINS_TABLE(uncore, __pvc_fw_ranges);
                ASSIGN_SHADOW_TABLE(uncore, pvc_shadowed_regs);
                ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
index ddafa4a7ed718e336598301a39b6b1f1fa7e0009..5449146a06247cb23febea8b7536c125fc5001af 100644 (file)
@@ -62,6 +62,7 @@ enum forcewake_domain_id {
        FW_DOMAIN_ID_MEDIA_VEBOX1,
        FW_DOMAIN_ID_MEDIA_VEBOX2,
        FW_DOMAIN_ID_MEDIA_VEBOX3,
+       FW_DOMAIN_ID_GSC,
 
        FW_DOMAIN_ID_COUNT
 };
@@ -82,6 +83,7 @@ enum forcewake_domains {
        FORCEWAKE_MEDIA_VEBOX1  = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1),
        FORCEWAKE_MEDIA_VEBOX2  = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2),
        FORCEWAKE_MEDIA_VEBOX3  = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3),
+       FORCEWAKE_GSC           = BIT(FW_DOMAIN_ID_GSC),
 
        FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1,
 };
index 69cdaaddc4a9067192e8550a1f4d0c850da7e4e7..5efe61f67546011646df1e0d05e0889ceff8bded 100644 (file)
@@ -103,19 +103,15 @@ static int create_vcs_context(struct intel_pxp *pxp)
 
 static void destroy_vcs_context(struct intel_pxp *pxp)
 {
-       intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce));
+       if (pxp->ce)
+               intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce));
 }
 
-void intel_pxp_init(struct intel_pxp *pxp)
+static void pxp_init_full(struct intel_pxp *pxp)
 {
        struct intel_gt *gt = pxp_to_gt(pxp);
        int ret;
 
-       if (!HAS_PXP(gt->i915))
-               return;
-
-       mutex_init(&pxp->tee_mutex);
-
        /*
         * we'll use the completion to check if there is a termination pending,
         * so we start it as completed and we reinit it when a termination
@@ -124,8 +120,7 @@ void intel_pxp_init(struct intel_pxp *pxp)
        init_completion(&pxp->termination);
        complete_all(&pxp->termination);
 
-       mutex_init(&pxp->arb_mutex);
-       INIT_WORK(&pxp->session_work, intel_pxp_session_work);
+       intel_pxp_session_management_init(pxp);
 
        ret = create_vcs_context(pxp);
        if (ret)
@@ -143,11 +138,26 @@ out_context:
        destroy_vcs_context(pxp);
 }
 
-void intel_pxp_fini(struct intel_pxp *pxp)
+void intel_pxp_init(struct intel_pxp *pxp)
 {
-       if (!intel_pxp_is_enabled(pxp))
+       struct intel_gt *gt = pxp_to_gt(pxp);
+
+       /* we rely on the mei PXP module */
+       if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP))
                return;
 
+       /*
+        * If HuC is loaded by GSC but PXP is disabled, we can skip the init of
+        * the full PXP session/object management and just init the tee channel.
+        */
+       if (HAS_PXP(gt->i915))
+               pxp_init_full(pxp);
+       else if (intel_huc_is_loaded_by_gsc(&gt->uc.huc) && intel_uc_uses_huc(&gt->uc))
+               intel_pxp_tee_component_init(pxp);
+}
+
+void intel_pxp_fini(struct intel_pxp *pxp)
+{
        pxp->arb_is_valid = false;
 
        intel_pxp_tee_component_fini(pxp);
index 73847e535cab7d24a18489ef78f306b0523abd87..2da309088c6dad80519f052542767687f76afe5b 100644 (file)
@@ -12,7 +12,6 @@
 struct intel_pxp;
 struct drm_i915_gem_object;
 
-#ifdef CONFIG_DRM_I915_PXP
 struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp);
 bool intel_pxp_is_enabled(const struct intel_pxp *pxp);
 bool intel_pxp_is_active(const struct intel_pxp *pxp);
@@ -32,36 +31,5 @@ int intel_pxp_key_check(struct intel_pxp *pxp,
                        bool assign);
 
 void intel_pxp_invalidate(struct intel_pxp *pxp);
-#else
-static inline void intel_pxp_init(struct intel_pxp *pxp)
-{
-}
-
-static inline void intel_pxp_fini(struct intel_pxp *pxp)
-{
-}
-
-static inline int intel_pxp_start(struct intel_pxp *pxp)
-{
-       return -ENODEV;
-}
-
-static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp)
-{
-       return false;
-}
-
-static inline bool intel_pxp_is_active(const struct intel_pxp *pxp)
-{
-       return false;
-}
-
-static inline int intel_pxp_key_check(struct intel_pxp *pxp,
-                                     struct drm_i915_gem_object *obj,
-                                     bool assign)
-{
-       return -ENODEV;
-}
-#endif
 
 #endif /* __INTEL_PXP_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
new file mode 100644 (file)
index 0000000..7ec36d9
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved.
+ */
+
+#include "drm/i915_drm.h"
+#include "i915_drv.h"
+
+#include "gem/i915_gem_region.h"
+#include "gt/intel_gt.h"
+
+#include "intel_pxp.h"
+#include "intel_pxp_huc.h"
+#include "intel_pxp_tee.h"
+#include "intel_pxp_types.h"
+#include "intel_pxp_tee_interface.h"
+
+int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp)
+{
+       struct intel_gt *gt = pxp_to_gt(pxp);
+       struct intel_huc *huc = &gt->uc.huc;
+       struct pxp_tee_start_huc_auth_in huc_in = {0};
+       struct pxp_tee_start_huc_auth_out huc_out = {0};
+       dma_addr_t huc_phys_addr;
+       u8 client_id = 0;
+       u8 fence_id = 0;
+       int err;
+
+       if (!pxp->pxp_component)
+               return -ENODEV;
+
+       huc_phys_addr = i915_gem_object_get_dma_address(huc->fw.obj, 0);
+
+       /* write the PXP message into the lmem (the sg list) */
+       huc_in.header.api_version = PXP_TEE_43_APIVER;
+       huc_in.header.command_id  = PXP_TEE_43_START_HUC_AUTH;
+       huc_in.header.status      = 0;
+       huc_in.header.buffer_len  = sizeof(huc_in.huc_base_address);
+       huc_in.huc_base_address   = huc_phys_addr;
+
+       err = intel_pxp_tee_stream_message(pxp, client_id, fence_id,
+                                          &huc_in, sizeof(huc_in),
+                                          &huc_out, sizeof(huc_out));
+       if (err < 0) {
+               drm_err(&gt->i915->drm,
+                       "Failed to send HuC load and auth command to GSC [%d]!\n",
+                       err);
+               return err;
+       }
+
+       /*
+        * HuC does sometimes survive suspend/resume (it depends on how "deep"
+        * a sleep state the device reaches) so we can end up here on resume
+        * with HuC already loaded, in which case the GSC will return
+        * PXP_STATUS_OP_NOT_PERMITTED. We can therefore consider the GuC
+        * correctly transferred in this scenario; if the same error is ever
+        * returned with HuC not loaded we'll still catch it when we check the
+        * authentication bit later.
+        */
+       if (huc_out.header.status != PXP_STATUS_SUCCESS &&
+           huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
+               drm_err(&gt->i915->drm,
+                       "HuC load failed with GSC error = 0x%x\n",
+                       huc_out.header.status);
+               return -EPROTO;
+       }
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h
new file mode 100644 (file)
index 0000000..e40847a
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved.
+ */
+
+#ifndef __INTEL_PXP_HUC_H__
+#define __INTEL_PXP_HUC_H__
+
+struct intel_pxp;
+
+int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp);
+
+#endif /* __INTEL_PXP_HUC_H__ */
index 8b5793654844282b0fd0c04d32365617096348aa..8c292dc86f68ec0cf567f4aa1cbe5d0b36dee8ce 100644 (file)
@@ -27,6 +27,14 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir);
 static inline void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
 {
 }
+
+static inline void intel_pxp_irq_enable(struct intel_pxp *pxp)
+{
+}
+
+static inline void intel_pxp_irq_disable(struct intel_pxp *pxp)
+{
+}
 #endif
 
 #endif /* __INTEL_PXP_IRQ_H__ */
index c4f5c994ca51217c9e9f2ce10701eb7ff12e8ebc..85572360c71a991a1432f227163ffce6d69f126c 100644 (file)
@@ -138,7 +138,7 @@ static void pxp_terminate_complete(struct intel_pxp *pxp)
        complete_all(&pxp->termination);
 }
 
-void intel_pxp_session_work(struct work_struct *work)
+static void pxp_session_work(struct work_struct *work)
 {
        struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work);
        struct intel_gt *gt = pxp_to_gt(pxp);
@@ -173,3 +173,9 @@ void intel_pxp_session_work(struct work_struct *work)
 
        intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 }
+
+void intel_pxp_session_management_init(struct intel_pxp *pxp)
+{
+       mutex_init(&pxp->arb_mutex);
+       INIT_WORK(&pxp->session_work, pxp_session_work);
+}
index ba4c9d2b94b77521e9ff30a5428eb7130f2b2450..903ac52cffa1ae78cbf353dd9e4e087f16800c3d 100644 (file)
@@ -8,8 +8,13 @@
 
 #include <linux/types.h>
 
-struct work_struct;
-
-void intel_pxp_session_work(struct work_struct *work);
+struct intel_pxp;
 
+#ifdef CONFIG_DRM_I915_PXP
+void intel_pxp_session_management_init(struct intel_pxp *pxp);
+#else
+static inline void intel_pxp_session_management_init(struct intel_pxp *pxp)
+{
+}
+#endif
 #endif /* __INTEL_PXP_SESSION_H__ */
index a90905039216c76abe0b21289eed7a55537cda12..052fd2f9a583291b68c7d69fb8e5b41f794e29ca 100644 (file)
@@ -8,11 +8,14 @@
 #include <drm/i915_pxp_tee_interface.h>
 #include <drm/i915_component.h>
 
+#include "gem/i915_gem_lmem.h"
+
 #include "i915_drv.h"
 #include "intel_pxp.h"
 #include "intel_pxp_session.h"
 #include "intel_pxp_tee.h"
 #include "intel_pxp_tee_interface.h"
+#include "intel_pxp_huc.h"
 
 static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev)
 {
@@ -69,6 +72,47 @@ unlock:
        return ret;
 }
 
+int intel_pxp_tee_stream_message(struct intel_pxp *pxp,
+                                u8 client_id, u32 fence_id,
+                                void *msg_in, size_t msg_in_len,
+                                void *msg_out, size_t msg_out_len)
+{
+       /* TODO: for bigger objects we need to use a sg of 4k pages */
+       const size_t max_msg_size = PAGE_SIZE;
+       struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915;
+       struct i915_pxp_component *pxp_component = pxp->pxp_component;
+       unsigned int offset = 0;
+       struct scatterlist *sg;
+       int ret;
+
+       if (msg_in_len > max_msg_size || msg_out_len > max_msg_size)
+               return -ENOSPC;
+
+       mutex_lock(&pxp->tee_mutex);
+
+       if (unlikely(!pxp_component || !pxp_component->ops->gsc_command)) {
+               ret = -ENODEV;
+               goto unlock;
+       }
+
+       GEM_BUG_ON(!pxp->stream_cmd.obj);
+
+       sg = i915_gem_object_get_sg_dma(pxp->stream_cmd.obj, 0, &offset);
+
+       memcpy(pxp->stream_cmd.vaddr, msg_in, msg_in_len);
+
+       ret = pxp_component->ops->gsc_command(pxp_component->tee_dev, client_id,
+                                             fence_id, sg, msg_in_len, sg);
+       if (ret < 0)
+               drm_err(&i915->drm, "Failed to send PXP TEE gsc command\n");
+       else
+               memcpy(msg_out, pxp->stream_cmd.vaddr, msg_out_len);
+
+unlock:
+       mutex_unlock(&pxp->tee_mutex);
+       return ret;
+}
+
 /**
  * i915_pxp_tee_component_bind - bind function to pass the function pointers to pxp_tee
  * @i915_kdev: pointer to i915 kernel device
@@ -84,24 +128,36 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev,
 {
        struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
        struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev);
+       struct intel_uc *uc = &pxp_to_gt(pxp)->uc;
        intel_wakeref_t wakeref;
+       int ret = 0;
 
        mutex_lock(&pxp->tee_mutex);
        pxp->pxp_component = data;
        pxp->pxp_component->tee_dev = tee_kdev;
        mutex_unlock(&pxp->tee_mutex);
 
+       if (intel_uc_uses_huc(uc) && intel_huc_is_loaded_by_gsc(&uc->huc)) {
+               with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+                       /* load huc via pxp */
+                       ret = intel_huc_fw_load_and_auth_via_gsc(&uc->huc);
+                       if (ret < 0)
+                               drm_err(&i915->drm, "failed to load huc via gsc %d\n", ret);
+               }
+       }
+
        /* if we are suspended, the HW will be re-initialized on resume */
        wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
        if (!wakeref)
                return 0;
 
        /* the component is required to fully start the PXP HW */
-       intel_pxp_init_hw(pxp);
+       if (intel_pxp_is_enabled(pxp))
+               intel_pxp_init_hw(pxp);
 
        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
-       return 0;
+       return ret;
 }
 
 static void i915_pxp_tee_component_unbind(struct device *i915_kdev,
@@ -111,8 +167,9 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev,
        struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev);
        intel_wakeref_t wakeref;
 
-       with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref)
-               intel_pxp_fini_hw(pxp);
+       if (intel_pxp_is_enabled(pxp))
+               with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref)
+                       intel_pxp_fini_hw(pxp);
 
        mutex_lock(&pxp->tee_mutex);
        pxp->pxp_component = NULL;
@@ -124,22 +181,92 @@ static const struct component_ops i915_pxp_tee_component_ops = {
        .unbind = i915_pxp_tee_component_unbind,
 };
 
+static int alloc_streaming_command(struct intel_pxp *pxp)
+{
+       struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915;
+       struct drm_i915_gem_object *obj = NULL;
+       void *cmd;
+       int err;
+
+       pxp->stream_cmd.obj = NULL;
+       pxp->stream_cmd.vaddr = NULL;
+
+       if (!IS_DGFX(i915))
+               return 0;
+
+       /* allocate lmem object of one page for PXP command memory and store it */
+       obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS);
+       if (IS_ERR(obj)) {
+               drm_err(&i915->drm, "Failed to allocate pxp streaming command!\n");
+               return PTR_ERR(obj);
+       }
+
+       err = i915_gem_object_pin_pages_unlocked(obj);
+       if (err) {
+               drm_err(&i915->drm, "Failed to pin gsc message page!\n");
+               goto out_put;
+       }
+
+       /* map the lmem into the virtual memory pointer */
+       cmd = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true));
+       if (IS_ERR(cmd)) {
+               drm_err(&i915->drm, "Failed to map gsc message page!\n");
+               err = PTR_ERR(cmd);
+               goto out_unpin;
+       }
+
+       memset(cmd, 0, obj->base.size);
+
+       pxp->stream_cmd.obj = obj;
+       pxp->stream_cmd.vaddr = cmd;
+
+       return 0;
+
+out_unpin:
+       i915_gem_object_unpin_pages(obj);
+out_put:
+       i915_gem_object_put(obj);
+       return err;
+}
+
+static void free_streaming_command(struct intel_pxp *pxp)
+{
+       struct drm_i915_gem_object *obj = fetch_and_zero(&pxp->stream_cmd.obj);
+
+       if (!obj)
+               return;
+
+       i915_gem_object_unpin_map(obj);
+       i915_gem_object_unpin_pages(obj);
+       i915_gem_object_put(obj);
+}
+
 int intel_pxp_tee_component_init(struct intel_pxp *pxp)
 {
        int ret;
        struct intel_gt *gt = pxp_to_gt(pxp);
        struct drm_i915_private *i915 = gt->i915;
 
+       mutex_init(&pxp->tee_mutex);
+
+       ret = alloc_streaming_command(pxp);
+       if (ret)
+               return ret;
+
        ret = component_add_typed(i915->drm.dev, &i915_pxp_tee_component_ops,
                                  I915_COMPONENT_PXP);
        if (ret < 0) {
                drm_err(&i915->drm, "Failed to add PXP component (%d)\n", ret);
-               return ret;
+               goto out_free;
        }
 
        pxp->pxp_component_added = true;
 
        return 0;
+
+out_free:
+       free_streaming_command(pxp);
+       return ret;
 }
 
 void intel_pxp_tee_component_fini(struct intel_pxp *pxp)
@@ -151,6 +278,8 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp)
 
        component_del(i915->drm.dev, &i915_pxp_tee_component_ops);
        pxp->pxp_component_added = false;
+
+       free_streaming_command(pxp);
 }
 
 int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp,
index c136053ce3406535761e4e9448dc6eef292e1fba..aeb3dfe7ce960ff1c659d1bafd698db669c27d85 100644 (file)
@@ -14,4 +14,9 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp);
 int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp,
                                         int arb_session_id);
 
+int intel_pxp_tee_stream_message(struct intel_pxp *pxp,
+                                u8 client_id, u32 fence_id,
+                                void *msg_in, size_t msg_in_len,
+                                void *msg_out, size_t msg_out_len);
+
 #endif /* __INTEL_PXP_TEE_H__ */
index 36e9b0868f5c1e32ad580278fefc62a26875061b..7edc1760f14201b3b6bc500fbbe2f1591f91d3bc 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Copyright(c) 2020, Intel Corporation. All rights reserved.
+ * Copyright(c) 2020-2022, Intel Corporation. All rights reserved.
  */
 
 #ifndef __INTEL_PXP_TEE_INTERFACE_H__
@@ -9,8 +9,20 @@
 #include <linux/types.h>
 
 #define PXP_TEE_APIVER 0x40002
+#define PXP_TEE_43_APIVER 0x00040003
 #define PXP_TEE_ARB_CMDID 0x1e
 #define PXP_TEE_ARB_PROTECTION_MODE 0x2
+#define PXP_TEE_43_START_HUC_AUTH   0x0000003A
+
+/*
+ * there are a lot of status codes for PXP, but we only define the ones we
+ * actually can handle in the driver. other failure codes will be printed to
+ * error msg for debug.
+ */
+enum pxp_status {
+       PXP_STATUS_SUCCESS = 0x0,
+       PXP_STATUS_OP_NOT_PERMITTED = 0x4013
+};
 
 /* PXP TEE message header */
 struct pxp_tee_cmd_header {
@@ -33,4 +45,13 @@ struct pxp_tee_create_arb_out {
        struct pxp_tee_cmd_header header;
 } __packed;
 
+struct pxp_tee_start_huc_auth_in {
+       struct pxp_tee_cmd_header header;
+       __le64                    huc_base_address;
+};
+
+struct pxp_tee_start_huc_auth_out {
+       struct pxp_tee_cmd_header header;
+};
+
 #endif /* __INTEL_PXP_TEE_INTERFACE_H__ */
index 7ce5f37ee12e12e8eff87338dca05b02bd837dbe..f74b1e11a505f8d1416418f94d82349bd38c0db0 100644 (file)
@@ -53,6 +53,12 @@ struct intel_pxp {
        /** @tee_mutex: protects the tee channel binding and messaging. */
        struct mutex tee_mutex;
 
+       /** @stream_cmd: LMEM obj used to send stream PXP commands to the GSC */
+       struct {
+               struct drm_i915_gem_object *obj; /* contains PXP command memory */
+               void *vaddr; /* virtual memory for PXP command */
+       } stream_cmd;
+
        /**
         * @hw_state_invalidated: if the HW perceives an attack on the integrity
         * of the encryption it will invalidate the keys and expect SW to
index e050a2de5fd1df8fc4397f12d23256f1fbd75ac0..27c733b0097628bcecbc6f4ebc2f1ae7f688492c 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_context.h"
@@ -1113,15 +1114,8 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re
        expected_node_size = expected_vma_size;
 
        if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) {
-               /*
-                * The compact-pt should expand lmem node to 2MB for the ppGTT,
-                * for all other cases we should only expect 64K.
-                */
                expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
-               if (NEEDS_COMPACT_PT(vm->i915) && !i915_is_ggtt(vm))
-                       expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M);
-               else
-                       expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
+               expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
        }
 
        if (vma->size != expected_vma_size || vma->node.size != expected_node_size) {
index 429c6d73b159c330250defd57385e986b45424a1..24dde55314230f2c721b907b2479b66b86212dca 100644 (file)
@@ -102,6 +102,12 @@ test_stream(struct i915_perf *perf)
                I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
        };
        struct i915_perf_stream *stream;
+       struct intel_gt *gt;
+
+       if (!props.engine)
+               return NULL;
+
+       gt = props.engine->gt;
 
        if (!oa_config)
                return NULL;
@@ -116,12 +122,12 @@ test_stream(struct i915_perf *perf)
 
        stream->perf = perf;
 
-       mutex_lock(&perf->lock);
+       mutex_lock(&gt->perf.lock);
        if (i915_oa_stream_init(stream, &param, &props)) {
                kfree(stream);
                stream =  NULL;
        }
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&gt->perf.lock);
 
        i915_oa_config_put(oa_config);
 
@@ -130,11 +136,11 @@ test_stream(struct i915_perf *perf)
 
 static void stream_destroy(struct i915_perf_stream *stream)
 {
-       struct i915_perf *perf = stream->perf;
+       struct intel_gt *gt = stream->engine->gt;
 
-       mutex_lock(&perf->lock);
+       mutex_lock(&gt->perf.lock);
        i915_perf_destroy_locked(stream);
-       mutex_unlock(&perf->lock);
+       mutex_unlock(&gt->perf.lock);
 }
 
 static int live_sanitycheck(void *arg)
index 818a4909c1f354242076d9f1859cfe7931134fe8..a46350c37e9d495288b57f554d26b1401f5b57ee 100644 (file)
@@ -299,9 +299,18 @@ __live_request_alloc(struct intel_context *ce)
        return intel_context_create_request(ce);
 }
 
-static int __igt_breadcrumbs_smoketest(void *arg)
+struct smoke_thread {
+       struct kthread_worker *worker;
+       struct kthread_work work;
+       struct smoketest *t;
+       bool stop;
+       int result;
+};
+
+static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
 {
-       struct smoketest *t = arg;
+       struct smoke_thread *thread = container_of(work, typeof(*thread), work);
+       struct smoketest *t = thread->t;
        const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
        const unsigned int total = 4 * t->ncontexts + 1;
        unsigned int num_waits = 0, num_fences = 0;
@@ -320,8 +329,10 @@ static int __igt_breadcrumbs_smoketest(void *arg)
         */
 
        requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
-       if (!requests)
-               return -ENOMEM;
+       if (!requests) {
+               thread->result = -ENOMEM;
+               return;
+       }
 
        order = i915_random_order(total, &prng);
        if (!order) {
@@ -329,7 +340,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
                goto out_requests;
        }
 
-       while (!kthread_should_stop()) {
+       while (!READ_ONCE(thread->stop)) {
                struct i915_sw_fence *submit, *wait;
                unsigned int n, count;
 
@@ -437,7 +448,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
        kfree(order);
 out_requests:
        kfree(requests);
-       return err;
+       thread->result = err;
 }
 
 static int mock_breadcrumbs_smoketest(void *arg)
@@ -450,7 +461,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
                .request_alloc = __mock_request_alloc
        };
        unsigned int ncpus = num_online_cpus();
-       struct task_struct **threads;
+       struct smoke_thread *threads;
        unsigned int n;
        int ret = 0;
 
@@ -479,28 +490,37 @@ static int mock_breadcrumbs_smoketest(void *arg)
        }
 
        for (n = 0; n < ncpus; n++) {
-               threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
-                                        &t, "igt/%d", n);
-               if (IS_ERR(threads[n])) {
-                       ret = PTR_ERR(threads[n]);
+               struct kthread_worker *worker;
+
+               worker = kthread_create_worker(0, "igt/%d", n);
+               if (IS_ERR(worker)) {
+                       ret = PTR_ERR(worker);
                        ncpus = n;
                        break;
                }
 
-               get_task_struct(threads[n]);
+               threads[n].worker = worker;
+               threads[n].t = &t;
+               threads[n].stop = false;
+               threads[n].result = 0;
+
+               kthread_init_work(&threads[n].work,
+                                 __igt_breadcrumbs_smoketest);
+               kthread_queue_work(worker, &threads[n].work);
        }
 
-       yield(); /* start all threads before we begin */
        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
        for (n = 0; n < ncpus; n++) {
                int err;
 
-               err = kthread_stop(threads[n]);
+               WRITE_ONCE(threads[n].stop, true);
+               kthread_flush_work(&threads[n].work);
+               err = READ_ONCE(threads[n].result);
                if (err < 0 && !ret)
                        ret = err;
 
-               put_task_struct(threads[n]);
+               kthread_destroy_worker(threads[n].worker);
        }
        pr_info("Completed %lu waits for %lu fence across %d cpus\n",
                atomic_long_read(&t.num_waits),
@@ -1419,9 +1439,18 @@ out_free:
        return err;
 }
 
-static int __live_parallel_engine1(void *arg)
+struct parallel_thread {
+       struct kthread_worker *worker;
+       struct kthread_work work;
+       struct intel_engine_cs *engine;
+       int result;
+};
+
+static void __live_parallel_engine1(struct kthread_work *work)
 {
-       struct intel_engine_cs *engine = arg;
+       struct parallel_thread *thread =
+               container_of(work, typeof(*thread), work);
+       struct intel_engine_cs *engine = thread->engine;
        IGT_TIMEOUT(end_time);
        unsigned long count;
        int err = 0;
@@ -1452,12 +1481,14 @@ static int __live_parallel_engine1(void *arg)
        intel_engine_pm_put(engine);
 
        pr_info("%s: %lu request + sync\n", engine->name, count);
-       return err;
+       thread->result = err;
 }
 
-static int __live_parallel_engineN(void *arg)
+static void __live_parallel_engineN(struct kthread_work *work)
 {
-       struct intel_engine_cs *engine = arg;
+       struct parallel_thread *thread =
+               container_of(work, typeof(*thread), work);
+       struct intel_engine_cs *engine = thread->engine;
        IGT_TIMEOUT(end_time);
        unsigned long count;
        int err = 0;
@@ -1479,7 +1510,7 @@ static int __live_parallel_engineN(void *arg)
        intel_engine_pm_put(engine);
 
        pr_info("%s: %lu requests\n", engine->name, count);
-       return err;
+       thread->result = err;
 }
 
 static bool wake_all(struct drm_i915_private *i915)
@@ -1505,9 +1536,11 @@ static int wait_for_all(struct drm_i915_private *i915)
        return -ETIME;
 }
 
-static int __live_parallel_spin(void *arg)
+static void __live_parallel_spin(struct kthread_work *work)
 {
-       struct intel_engine_cs *engine = arg;
+       struct parallel_thread *thread =
+               container_of(work, typeof(*thread), work);
+       struct intel_engine_cs *engine = thread->engine;
        struct igt_spinner spin;
        struct i915_request *rq;
        int err = 0;
@@ -1520,7 +1553,8 @@ static int __live_parallel_spin(void *arg)
 
        if (igt_spinner_init(&spin, engine->gt)) {
                wake_all(engine->i915);
-               return -ENOMEM;
+               thread->result = -ENOMEM;
+               return;
        }
 
        intel_engine_pm_get(engine);
@@ -1553,22 +1587,22 @@ static int __live_parallel_spin(void *arg)
 
 out_spin:
        igt_spinner_fini(&spin);
-       return err;
+       thread->result = err;
 }
 
 static int live_parallel_engines(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       static int (* const func[])(void *arg) = {
+       static void (* const func[])(struct kthread_work *) = {
                __live_parallel_engine1,
                __live_parallel_engineN,
                __live_parallel_spin,
                NULL,
        };
        const unsigned int nengines = num_uabi_engines(i915);
+       struct parallel_thread *threads;
        struct intel_engine_cs *engine;
-       int (* const *fn)(void *arg);
-       struct task_struct **tsk;
+       void (* const *fn)(struct kthread_work *);
        int err = 0;
 
        /*
@@ -1576,8 +1610,8 @@ static int live_parallel_engines(void *arg)
         * tests that we load up the system maximally.
         */
 
-       tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
-       if (!tsk)
+       threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL);
+       if (!threads)
                return -ENOMEM;
 
        for (fn = func; !err && *fn; fn++) {
@@ -1594,37 +1628,44 @@ static int live_parallel_engines(void *arg)
 
                idx = 0;
                for_each_uabi_engine(engine, i915) {
-                       tsk[idx] = kthread_run(*fn, engine,
-                                              "igt/parallel:%s",
-                                              engine->name);
-                       if (IS_ERR(tsk[idx])) {
-                               err = PTR_ERR(tsk[idx]);
+                       struct kthread_worker *worker;
+
+                       worker = kthread_create_worker(0, "igt/parallel:%s",
+                                                      engine->name);
+                       if (IS_ERR(worker)) {
+                               err = PTR_ERR(worker);
                                break;
                        }
-                       get_task_struct(tsk[idx++]);
-               }
 
-               yield(); /* start all threads before we kthread_stop() */
+                       threads[idx].worker = worker;
+                       threads[idx].result = 0;
+                       threads[idx].engine = engine;
+
+                       kthread_init_work(&threads[idx].work, *fn);
+                       kthread_queue_work(worker, &threads[idx].work);
+                       idx++;
+               }
 
                idx = 0;
                for_each_uabi_engine(engine, i915) {
                        int status;
 
-                       if (IS_ERR(tsk[idx]))
+                       if (!threads[idx].worker)
                                break;
 
-                       status = kthread_stop(tsk[idx]);
+                       kthread_flush_work(&threads[idx].work);
+                       status = READ_ONCE(threads[idx].result);
                        if (status && !err)
                                err = status;
 
-                       put_task_struct(tsk[idx++]);
+                       kthread_destroy_worker(threads[idx++].worker);
                }
 
                if (igt_live_test_end(&t))
                        err = -EIO;
        }
 
-       kfree(tsk);
+       kfree(threads);
        return err;
 }
 
@@ -1672,7 +1713,7 @@ static int live_breadcrumbs_smoketest(void *arg)
        const unsigned int ncpus = num_online_cpus();
        unsigned long num_waits, num_fences;
        struct intel_engine_cs *engine;
-       struct task_struct **threads;
+       struct smoke_thread *threads;
        struct igt_live_test live;
        intel_wakeref_t wakeref;
        struct smoketest *smoke;
@@ -1746,23 +1787,26 @@ static int live_breadcrumbs_smoketest(void *arg)
                         smoke[idx].max_batch, engine->name);
 
                for (n = 0; n < ncpus; n++) {
-                       struct task_struct *tsk;
+                       unsigned int i = idx * ncpus + n;
+                       struct kthread_worker *worker;
 
-                       tsk = kthread_run(__igt_breadcrumbs_smoketest,
-                                         &smoke[idx], "igt/%d.%d", idx, n);
-                       if (IS_ERR(tsk)) {
-                               ret = PTR_ERR(tsk);
+                       worker = kthread_create_worker(0, "igt/%d.%d", idx, n);
+                       if (IS_ERR(worker)) {
+                               ret = PTR_ERR(worker);
                                goto out_flush;
                        }
 
-                       get_task_struct(tsk);
-                       threads[idx * ncpus + n] = tsk;
+                       threads[i].worker = worker;
+                       threads[i].t = &smoke[idx];
+
+                       kthread_init_work(&threads[i].work,
+                                         __igt_breadcrumbs_smoketest);
+                       kthread_queue_work(worker, &threads[i].work);
                }
 
                idx++;
        }
 
-       yield(); /* start all threads before we begin */
        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
 out_flush:
@@ -1771,17 +1815,19 @@ out_flush:
        num_fences = 0;
        for_each_uabi_engine(engine, i915) {
                for (n = 0; n < ncpus; n++) {
-                       struct task_struct *tsk = threads[idx * ncpus + n];
+                       unsigned int i = idx * ncpus + n;
                        int err;
 
-                       if (!tsk)
+                       if (!threads[i].worker)
                                continue;
 
-                       err = kthread_stop(tsk);
+                       WRITE_ONCE(threads[i].stop, true);
+                       kthread_flush_work(&threads[i].work);
+                       err = READ_ONCE(threads[i].result);
                        if (err < 0 && !ret)
                                ret = err;
 
-                       put_task_struct(tsk);
+                       kthread_destroy_worker(threads[i].worker);
                }
 
                num_waits += atomic_long_read(&smoke[idx].num_waits);
@@ -2891,9 +2937,18 @@ out:
        return err;
 }
 
-static int p_sync0(void *arg)
+struct p_thread {
+       struct perf_stats p;
+       struct kthread_worker *worker;
+       struct kthread_work work;
+       struct intel_engine_cs *engine;
+       int result;
+};
+
+static void p_sync0(struct kthread_work *work)
 {
-       struct perf_stats *p = arg;
+       struct p_thread *thread = container_of(work, typeof(*thread), work);
+       struct perf_stats *p = &thread->p;
        struct intel_engine_cs *engine = p->engine;
        struct intel_context *ce;
        IGT_TIMEOUT(end_time);
@@ -2902,13 +2957,16 @@ static int p_sync0(void *arg)
        int err = 0;
 
        ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
+       if (IS_ERR(ce)) {
+               thread->result = PTR_ERR(ce);
+               return;
+       }
 
        err = intel_context_pin(ce);
        if (err) {
                intel_context_put(ce);
-               return err;
+               thread->result = err;
+               return;
        }
 
        if (intel_engine_supports_stats(engine)) {
@@ -2958,12 +3016,13 @@ static int p_sync0(void *arg)
 
        intel_context_unpin(ce);
        intel_context_put(ce);
-       return err;
+       thread->result = err;
 }
 
-static int p_sync1(void *arg)
+static void p_sync1(struct kthread_work *work)
 {
-       struct perf_stats *p = arg;
+       struct p_thread *thread = container_of(work, typeof(*thread), work);
+       struct perf_stats *p = &thread->p;
        struct intel_engine_cs *engine = p->engine;
        struct i915_request *prev = NULL;
        struct intel_context *ce;
@@ -2973,13 +3032,16 @@ static int p_sync1(void *arg)
        int err = 0;
 
        ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
+       if (IS_ERR(ce)) {
+               thread->result = PTR_ERR(ce);
+               return;
+       }
 
        err = intel_context_pin(ce);
        if (err) {
                intel_context_put(ce);
-               return err;
+               thread->result = err;
+               return;
        }
 
        if (intel_engine_supports_stats(engine)) {
@@ -3031,12 +3093,13 @@ static int p_sync1(void *arg)
 
        intel_context_unpin(ce);
        intel_context_put(ce);
-       return err;
+       thread->result = err;
 }
 
-static int p_many(void *arg)
+static void p_many(struct kthread_work *work)
 {
-       struct perf_stats *p = arg;
+       struct p_thread *thread = container_of(work, typeof(*thread), work);
+       struct perf_stats *p = &thread->p;
        struct intel_engine_cs *engine = p->engine;
        struct intel_context *ce;
        IGT_TIMEOUT(end_time);
@@ -3045,13 +3108,16 @@ static int p_many(void *arg)
        bool busy;
 
        ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
+       if (IS_ERR(ce)) {
+               thread->result = PTR_ERR(ce);
+               return;
+       }
 
        err = intel_context_pin(ce);
        if (err) {
                intel_context_put(ce);
-               return err;
+               thread->result = err;
+               return;
        }
 
        if (intel_engine_supports_stats(engine)) {
@@ -3092,26 +3158,23 @@ static int p_many(void *arg)
 
        intel_context_unpin(ce);
        intel_context_put(ce);
-       return err;
+       thread->result = err;
 }
 
 static int perf_parallel_engines(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       static int (* const func[])(void *arg) = {
+       static void (* const func[])(struct kthread_work *) = {
                p_sync0,
                p_sync1,
                p_many,
                NULL,
        };
        const unsigned int nengines = num_uabi_engines(i915);
+       void (* const *fn)(struct kthread_work *);
        struct intel_engine_cs *engine;
-       int (* const *fn)(void *arg);
        struct pm_qos_request qos;
-       struct {
-               struct perf_stats p;
-               struct task_struct *tsk;
-       } *engines;
+       struct p_thread *engines;
        int err = 0;
 
        engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
@@ -3134,36 +3197,45 @@ static int perf_parallel_engines(void *arg)
 
                idx = 0;
                for_each_uabi_engine(engine, i915) {
+                       struct kthread_worker *worker;
+
                        intel_engine_pm_get(engine);
 
                        memset(&engines[idx].p, 0, sizeof(engines[idx].p));
-                       engines[idx].p.engine = engine;
 
-                       engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
-                                                      "igt:%s", engine->name);
-                       if (IS_ERR(engines[idx].tsk)) {
-                               err = PTR_ERR(engines[idx].tsk);
+                       worker = kthread_create_worker(0, "igt:%s",
+                                                      engine->name);
+                       if (IS_ERR(worker)) {
+                               err = PTR_ERR(worker);
                                intel_engine_pm_put(engine);
                                break;
                        }
-                       get_task_struct(engines[idx++].tsk);
-               }
+                       engines[idx].worker = worker;
+                       engines[idx].result = 0;
+                       engines[idx].p.engine = engine;
+                       engines[idx].engine = engine;
 
-               yield(); /* start all threads before we kthread_stop() */
+                       kthread_init_work(&engines[idx].work, *fn);
+                       kthread_queue_work(worker, &engines[idx].work);
+                       idx++;
+               }
 
                idx = 0;
                for_each_uabi_engine(engine, i915) {
                        int status;
 
-                       if (IS_ERR(engines[idx].tsk))
+                       if (!engines[idx].worker)
                                break;
 
-                       status = kthread_stop(engines[idx].tsk);
+                       kthread_flush_work(&engines[idx].work);
+                       status = READ_ONCE(engines[idx].result);
                        if (status && !err)
                                err = status;
 
                        intel_engine_pm_put(engine);
-                       put_task_struct(engines[idx++].tsk);
+
+                       kthread_destroy_worker(engines[idx].worker);
+                       idx++;
                }
 
                if (igt_live_test_end(&t))
index fda9bb79c049d5a97c8dbdcb7d592331b28deac0..e4281508d5808b5c511ca85c2b17941ff88687e8 100644 (file)
@@ -70,6 +70,8 @@ static int intel_shadow_table_check(void)
                { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) },
                { dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) },
                { pvc_shadowed_regs, ARRAY_SIZE(pvc_shadowed_regs) },
+               { mtl_shadowed_regs, ARRAY_SIZE(mtl_shadowed_regs) },
+               { xelpmp_shadowed_regs, ARRAY_SIZE(xelpmp_shadowed_regs) },
        };
        const struct i915_range *range;
        unsigned int i, j;
@@ -117,6 +119,8 @@ int intel_uncore_mock_selftests(void)
                { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true },
                { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true },
                { __pvc_fw_ranges, ARRAY_SIZE(__pvc_fw_ranges), true },
+               { __mtl_fw_ranges, ARRAY_SIZE(__mtl_fw_ranges), true },
+               { __xelpmp_fw_ranges, ARRAY_SIZE(__xelpmp_fw_ranges), true },
        };
        int err, i;
 
index fff11c90f1fa599f444fc16edb9976e2e1271743..f6a7c0bd2955cf39493bda8bab0a9e060d967531 100644 (file)
@@ -67,7 +67,6 @@ static void mock_device_release(struct drm_device *dev)
        intel_gt_driver_remove(to_gt(i915));
 
        i915_gem_drain_workqueue(i915);
-       i915_gem_drain_freed_objects(i915);
 
        mock_fini_ggtt(to_gt(i915)->ggtt);
        destroy_workqueue(i915->wq);
index 46aa3554e97b02efc81325de811b6441e9fa831b..1fbe127ff63342cf13d16f6858eb2273a3895895 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
+#include <linux/scatterlist.h>
 #include <linux/mei_cl_bus.h>
 
 #include "mei_dev.h"
@@ -100,9 +101,18 @@ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
        cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL);
        cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING);
        memcpy(cb->buf.data, buf, length);
+       /* hack we point data to header */
+       if (mode & MEI_CL_IO_SGL) {
+               cb->ext_hdr = (struct mei_ext_hdr *)cb->buf.data;
+               cb->buf.data = NULL;
+               cb->buf.size = 0;
+       }
 
        rets = mei_cl_write(cl, cb);
 
+       if (mode & MEI_CL_IO_SGL && rets == 0)
+               rets = length;
+
 out:
        mutex_unlock(&bus->device_lock);
 
@@ -205,9 +215,16 @@ copy:
                goto free;
        }
 
-       r_length = min_t(size_t, length, cb->buf_idx);
-       memcpy(buf, cb->buf.data, r_length);
+       /* for the GSC type - copy the extended header to the buffer */
+       if (cb->ext_hdr && cb->ext_hdr->type == MEI_EXT_HDR_GSC) {
+               r_length = min_t(size_t, length, cb->ext_hdr->length * sizeof(u32));
+               memcpy(buf, cb->ext_hdr, r_length);
+       } else {
+               r_length = min_t(size_t, length, cb->buf_idx);
+               memcpy(buf, cb->buf.data, r_length);
+       }
        rets = r_length;
+
        if (vtag)
                *vtag = cb->vtag;
 
@@ -822,6 +839,131 @@ out:
 }
 EXPORT_SYMBOL_GPL(mei_cldev_disable);
 
+/**
+ * mei_cldev_send_gsc_command - sends a gsc command, by sending
+ * a gsl mei message to gsc and receiving reply from gsc
+ *
+ * @cldev: me client device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return:
+ *  * written size in bytes
+ *  * < 0 on error
+ */
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+                                  u8 client_id, u32 fence_id,
+                                  struct scatterlist *sg_in,
+                                  size_t total_in_len,
+                                  struct scatterlist *sg_out)
+{
+       struct mei_cl *cl;
+       struct mei_device *bus;
+       ssize_t ret = 0;
+
+       struct mei_ext_hdr_gsc_h2f *ext_hdr;
+       size_t buf_sz = sizeof(struct mei_ext_hdr_gsc_h2f);
+       int sg_out_nents, sg_in_nents;
+       int i;
+       struct scatterlist *sg;
+       struct mei_ext_hdr_gsc_f2h rx_msg;
+       unsigned int sg_len;
+
+       if (!cldev || !sg_in || !sg_out)
+               return -EINVAL;
+
+       cl = cldev->cl;
+       bus = cldev->bus;
+
+       dev_dbg(bus->dev, "client_id %u, fence_id %u\n", client_id, fence_id);
+
+       if (!bus->hbm_f_gsc_supported)
+               return -EOPNOTSUPP;
+
+       sg_out_nents = sg_nents(sg_out);
+       sg_in_nents = sg_nents(sg_in);
+       /* at least one entry in tx and rx sgls must be present */
+       if (sg_out_nents <= 0 || sg_in_nents <= 0)
+               return -EINVAL;
+
+       buf_sz += (sg_out_nents + sg_in_nents) * sizeof(struct mei_gsc_sgl);
+       ext_hdr = kzalloc(buf_sz, GFP_KERNEL);
+       if (!ext_hdr)
+               return -ENOMEM;
+
+       /* construct the GSC message */
+       ext_hdr->hdr.type = MEI_EXT_HDR_GSC;
+       ext_hdr->hdr.length = buf_sz / sizeof(u32); /* length is in dw */
+
+       ext_hdr->client_id = client_id;
+       ext_hdr->addr_type = GSC_ADDRESS_TYPE_PHYSICAL_SGL;
+       ext_hdr->fence_id = fence_id;
+       ext_hdr->input_address_count = sg_in_nents;
+       ext_hdr->output_address_count = sg_out_nents;
+       ext_hdr->reserved[0] = 0;
+       ext_hdr->reserved[1] = 0;
+
+       /* copy in-sgl to the message */
+       for (i = 0, sg = sg_in; i < sg_in_nents; i++, sg++) {
+               ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+               ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+               sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+               ext_hdr->sgl[i].length = (sg_len <= total_in_len) ? sg_len : total_in_len;
+               total_in_len -= ext_hdr->sgl[i].length;
+       }
+
+       /* copy out-sgl to the message */
+       for (i = sg_in_nents, sg = sg_out; i < sg_in_nents + sg_out_nents; i++, sg++) {
+               ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+               ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+               sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+               ext_hdr->sgl[i].length = sg_len;
+       }
+
+       /* send the message to GSC */
+       ret = __mei_cl_send(cl, (u8 *)ext_hdr, buf_sz, 0, MEI_CL_IO_SGL);
+       if (ret < 0) {
+               dev_err(bus->dev, "__mei_cl_send failed, returned %zd\n", ret);
+               goto end;
+       }
+       if (ret != buf_sz) {
+               dev_err(bus->dev, "__mei_cl_send returned %zd instead of expected %zd\n",
+                       ret, buf_sz);
+               ret = -EIO;
+               goto end;
+       }
+
+       /* receive the reply from GSC, note that at this point sg_in should contain the reply */
+       ret = __mei_cl_recv(cl, (u8 *)&rx_msg, sizeof(rx_msg), NULL, MEI_CL_IO_SGL, 0);
+
+       if (ret != sizeof(rx_msg)) {
+               dev_err(bus->dev, "__mei_cl_recv returned %zd instead of expected %zd\n",
+                       ret, sizeof(rx_msg));
+               if (ret >= 0)
+                       ret = -EIO;
+               goto end;
+       }
+
+       /* check rx_msg.client_id and rx_msg.fence_id match the ones we send */
+       if (rx_msg.client_id != client_id || rx_msg.fence_id != fence_id) {
+               dev_err(bus->dev, "received client_id/fence_id  %u/%u  instead of %u/%u sent\n",
+                       rx_msg.client_id, rx_msg.fence_id, client_id, fence_id);
+               ret = -EFAULT;
+               goto end;
+       }
+
+       dev_dbg(bus->dev, "gsc command: successfully written %u bytes\n",  rx_msg.written);
+       ret = rx_msg.written;
+
+end:
+       kfree(ext_hdr);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send_gsc_command);
+
 /**
  * mei_cl_device_find - find matching entry in the driver id table
  *
index 0b2fbe1335a7747dc195db0da91dec35b7af1a20..6c8b71ae32c840eea15e48acc2c6c00a23256871 100644 (file)
@@ -322,6 +322,7 @@ void mei_io_cb_free(struct mei_cl_cb *cb)
 
        list_del(&cb->list);
        kfree(cb->buf.data);
+       kfree(cb->ext_hdr);
        kfree(cb);
 }
 
@@ -401,6 +402,7 @@ static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
        cb->buf_idx = 0;
        cb->fop_type = type;
        cb->vtag = 0;
+       cb->ext_hdr = NULL;
 
        return cb;
 }
@@ -1740,6 +1742,17 @@ static inline u8 mei_ext_hdr_set_vtag(void *ext, u8 vtag)
        return vtag_hdr->hdr.length;
 }
 
+static inline bool mei_ext_hdr_is_gsc(struct mei_ext_hdr *ext)
+{
+       return ext && ext->type == MEI_EXT_HDR_GSC;
+}
+
+static inline u8 mei_ext_hdr_set_gsc(struct mei_ext_hdr *ext, struct mei_ext_hdr *gsc_hdr)
+{
+       memcpy(ext, gsc_hdr, mei_ext_hdr_len(gsc_hdr));
+       return ext->length;
+}
+
 /**
  * mei_msg_hdr_init - allocate and initialize mei message header
  *
@@ -1752,14 +1765,17 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
        size_t hdr_len;
        struct mei_ext_meta_hdr *meta;
        struct mei_msg_hdr *mei_hdr;
-       bool is_ext, is_vtag;
+       bool is_ext, is_hbm, is_gsc, is_vtag;
+       struct mei_ext_hdr *next_ext;
 
        if (!cb)
                return ERR_PTR(-EINVAL);
 
        /* Extended header for vtag is attached only on the first fragment */
        is_vtag = (cb->vtag && cb->buf_idx == 0);
-       is_ext = is_vtag;
+       is_hbm = cb->cl->me_cl->client_id == 0;
+       is_gsc = ((!is_hbm) && cb->cl->dev->hbm_f_gsc_supported && mei_ext_hdr_is_gsc(cb->ext_hdr));
+       is_ext = is_vtag || is_gsc;
 
        /* Compute extended header size */
        hdr_len = sizeof(*mei_hdr);
@@ -1771,6 +1787,9 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
        if (is_vtag)
                hdr_len += sizeof(struct mei_ext_hdr_vtag);
 
+       if (is_gsc)
+               hdr_len += mei_ext_hdr_len(cb->ext_hdr);
+
 setup_hdr:
        mei_hdr = kzalloc(hdr_len, GFP_KERNEL);
        if (!mei_hdr)
@@ -1785,10 +1804,20 @@ setup_hdr:
                goto out;
 
        meta = (struct mei_ext_meta_hdr *)mei_hdr->extension;
+       meta->size = 0;
+       next_ext = (struct mei_ext_hdr *)meta->hdrs;
        if (is_vtag) {
                meta->count++;
-               meta->size += mei_ext_hdr_set_vtag(meta->hdrs, cb->vtag);
+               meta->size += mei_ext_hdr_set_vtag(next_ext, cb->vtag);
+               next_ext = mei_ext_next(next_ext);
+       }
+
+       if (is_gsc) {
+               meta->count++;
+               meta->size += mei_ext_hdr_set_gsc(next_ext, cb->ext_hdr);
+               next_ext = mei_ext_next(next_ext);
        }
+
 out:
        mei_hdr->length = hdr_len - sizeof(*mei_hdr);
        return mei_hdr;
@@ -1812,14 +1841,14 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
        struct mei_msg_hdr *mei_hdr = NULL;
        size_t hdr_len;
        size_t hbuf_len, dr_len;
-       size_t buf_len;
+       size_t buf_len = 0;
        size_t data_len;
        int hbuf_slots;
        u32 dr_slots;
        u32 dma_len;
        int rets;
        bool first_chunk;
-       const void *data;
+       const void *data = NULL;
 
        if (WARN_ON(!cl || !cl->dev))
                return -ENODEV;
@@ -1839,8 +1868,10 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
                return 0;
        }
 
-       buf_len = buf->size - cb->buf_idx;
-       data = buf->data + cb->buf_idx;
+       if (buf->data) {
+               buf_len = buf->size - cb->buf_idx;
+               data = buf->data + cb->buf_idx;
+       }
        hbuf_slots = mei_hbuf_empty_slots(dev);
        if (hbuf_slots < 0) {
                rets = -EOVERFLOW;
@@ -1858,9 +1889,6 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
                goto err;
        }
 
-       cl_dbg(dev, cl, "Extended Header %d vtag = %d\n",
-              mei_hdr->extended, cb->vtag);
-
        hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
 
        /**
@@ -1889,7 +1917,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
        }
        mei_hdr->length += data_len;
 
-       if (mei_hdr->dma_ring)
+       if (mei_hdr->dma_ring && buf->data)
                mei_dma_ring_write(dev, buf->data + cb->buf_idx, buf_len);
        rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
 
@@ -1983,9 +2011,6 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
                goto err;
        }
 
-       cl_dbg(dev, cl, "Extended Header %d vtag = %d\n",
-              mei_hdr->extended, cb->vtag);
-
        hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
 
        if (rets == 0) {
@@ -2030,7 +2055,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
 
        mei_hdr->length += data_len;
 
-       if (mei_hdr->dma_ring)
+       if (mei_hdr->dma_ring && buf->data)
                mei_dma_ring_write(dev, buf->data, buf_len);
        rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
 
index de712cbf5d07253ad0fd3ee1177f077e229fe096..12a62a911e4237aad8bc381b7b0f989725e6a42a 100644 (file)
@@ -340,9 +340,13 @@ static int mei_hbm_capabilities_req(struct mei_device *dev)
        req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD;
        if (dev->hbm_f_vt_supported)
                req.capability_requested[0] |= HBM_CAP_VT;
+
        if (dev->hbm_f_cd_supported)
                req.capability_requested[0] |= HBM_CAP_CD;
 
+       if (dev->hbm_f_gsc_supported)
+               req.capability_requested[0] |= HBM_CAP_GSC;
+
        ret = mei_hbm_write_message(dev, &mei_hdr, &req);
        if (ret) {
                dev_err(dev->dev,
@@ -1200,6 +1204,12 @@ static void mei_hbm_config_features(struct mei_device *dev)
             dev->version.minor_version >= HBM_MINOR_VERSION_VT))
                dev->hbm_f_vt_supported = 1;
 
+       /* GSC support */
+       if (dev->version.major_version > HBM_MAJOR_VERSION_GSC ||
+           (dev->version.major_version == HBM_MAJOR_VERSION_GSC &&
+            dev->version.minor_version >= HBM_MINOR_VERSION_GSC))
+               dev->hbm_f_gsc_supported = 1;
+
        /* Capability message Support */
        dev->hbm_f_cap_supported = 0;
        if (dev->version.major_version > HBM_MAJOR_VERSION_CAP ||
@@ -1367,6 +1377,9 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
                if (!(capability_res->capability_granted[0] & HBM_CAP_CD))
                        dev->hbm_f_cd_supported = 0;
 
+               if (!(capability_res->capability_granted[0] & HBM_CAP_GSC))
+                       dev->hbm_f_gsc_supported = 0;
+
                if (dev->hbm_f_dr_supported) {
                        if (mei_dmam_ring_alloc(dev))
                                dev_info(dev->dev, "running w/o dma ring\n");
index 9e2f781c6ed527b779ac5945d1c3c36a188caf65..da4ef0b51954ca27aedeb94866d7c4c6f6f6445f 100644 (file)
@@ -590,9 +590,14 @@ static int mei_me_hbuf_write(struct mei_device *dev,
        u32 dw_cnt;
        int empty_slots;
 
-       if (WARN_ON(!hdr || !data || hdr_len & 0x3))
+       if (WARN_ON(!hdr || hdr_len & 0x3))
                return -EINVAL;
 
+       if (!data && data_len) {
+               dev_err(dev->dev, "wrong parameters null data with data_len = %zu\n", data_len);
+               return -EINVAL;
+       }
+
        dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
 
        empty_slots = mei_hbuf_empty_slots(dev);
index e7e020dba6b1d54075f0d09436295e3abbe21203..319418ddf4fb00b3336f763ea09966bf9fdb8dee 100644 (file)
 #define HBM_MINOR_VERSION_VT               2
 #define HBM_MAJOR_VERSION_VT               2
 
+/*
+ * MEI version with GSC support
+ */
+#define HBM_MINOR_VERSION_GSC              2
+#define HBM_MAJOR_VERSION_GSC              2
+
 /*
  * MEI version with capabilities message support
  */
@@ -229,18 +235,19 @@ enum mei_cl_disconnect_status {
  *
  * @MEI_EXT_HDR_NONE: sentinel
  * @MEI_EXT_HDR_VTAG: vtag header
+ * @MEI_EXT_HDR_GSC: gsc header
  */
 enum mei_ext_hdr_type {
        MEI_EXT_HDR_NONE = 0,
        MEI_EXT_HDR_VTAG = 1,
+       MEI_EXT_HDR_GSC = 2,
 };
 
 /**
  * struct mei_ext_hdr - extend header descriptor (TLV)
  * @type: enum mei_ext_hdr_type
  * @length: length excluding descriptor
- * @ext_payload: payload of the specific extended header
- * @hdr: place holder for actual header
+ * @data: the extended header payload
  */
 struct mei_ext_hdr {
        u8 type;
@@ -279,12 +286,11 @@ struct mei_ext_hdr_vtag {
  * Extended header iterator functions
  */
 /**
- * mei_ext_hdr - extended header iterator begin
+ * mei_ext_begin - extended header iterator begin
  *
  * @meta: meta header of the extended header list
  *
- * Return:
- *     The first extended header
+ * Return: The first extended header
  */
 static inline struct mei_ext_hdr *mei_ext_begin(struct mei_ext_meta_hdr *meta)
 {
@@ -305,6 +311,60 @@ static inline bool mei_ext_last(struct mei_ext_meta_hdr *meta,
        return (u8 *)ext >= (u8 *)meta + sizeof(*meta) + (meta->size * 4);
 }
 
+struct mei_gsc_sgl {
+       u32 low;
+       u32 high;
+       u32 length;
+} __packed;
+
+#define GSC_HECI_MSG_KERNEL 0
+#define GSC_HECI_MSG_USER   1
+
+#define GSC_ADDRESS_TYPE_GTT   0
+#define GSC_ADDRESS_TYPE_PPGTT 1
+#define GSC_ADDRESS_TYPE_PHYSICAL_CONTINUOUS 2 /* max of 64K */
+#define GSC_ADDRESS_TYPE_PHYSICAL_SGL 3
+
+/**
+ * struct mei_ext_hdr_gsc_h2f - extended header: gsc host to firmware interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @addr_type: GSC_ADDRESS_TYPE_{GTT, PPGTT, PHYSICAL_CONTINUOUS, PHYSICAL_SGL}
+ * @fence_id: synchronization marker
+ * @input_address_count: number of input sgl buffers
+ * @output_address_count: number of output sgl buffers
+ * @reserved: reserved
+ * @sgl: sg list
+ */
+struct mei_ext_hdr_gsc_h2f {
+       struct mei_ext_hdr hdr;
+       u8                 client_id;
+       u8                 addr_type;
+       u32                fence_id;
+       u8                 input_address_count;
+       u8                 output_address_count;
+       u8                 reserved[2];
+       struct mei_gsc_sgl sgl[];
+} __packed;
+
+/**
+ * struct mei_ext_hdr_gsc_f2h - gsc firmware to host interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @reserved: reserved
+ * @fence_id: synchronization marker
+ * @written: number of bytes written to firmware
+ */
+struct mei_ext_hdr_gsc_f2h {
+       struct mei_ext_hdr hdr;
+       u8                 client_id;
+       u8                 reserved;
+       u32                fence_id;
+       u32                written;
+} __packed;
+
 /**
  * mei_ext_next - following extended header on the TLV list
  *
@@ -320,6 +380,21 @@ static inline struct mei_ext_hdr *mei_ext_next(struct mei_ext_hdr *ext)
        return (struct mei_ext_hdr *)((u8 *)ext + (ext->length * 4));
 }
 
+/**
+ * mei_ext_hdr_len - get ext header length in bytes
+ *
+ * @ext: extend header
+ *
+ * Return: extend header length in bytes
+ */
+static inline u32 mei_ext_hdr_len(const struct mei_ext_hdr *ext)
+{
+       if (!ext)
+               return 0;
+
+       return ext->length * sizeof(u32);
+}
+
 /**
  * struct mei_msg_hdr - MEI BUS Interface Section
  *
@@ -682,6 +757,10 @@ struct hbm_dma_ring_ctrl {
 
 /* virtual tag supported */
 #define HBM_CAP_VT BIT(0)
+
+/* gsc extended header support */
+#define HBM_CAP_GSC BIT(1)
+
 /* client dma supported */
 #define HBM_CAP_CD BIT(2)
 
index 0706322154cbeff1c67b4623369a8328453574ad..0a0e984e56736cd0ece88f418171101c1addd9c4 100644 (file)
@@ -98,9 +98,12 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
        struct mei_device *dev = cl->dev;
        struct mei_cl_cb *cb;
 
+       struct mei_ext_hdr_vtag *vtag_hdr = NULL;
+       struct mei_ext_hdr_gsc_f2h *gsc_f2h = NULL;
+
        size_t buf_sz;
        u32 length;
-       int ext_len;
+       u32 ext_len;
 
        length = mei_hdr->length;
        ext_len = 0;
@@ -122,18 +125,24 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
        }
 
        if (mei_hdr->extended) {
-               struct mei_ext_hdr *ext;
-               struct mei_ext_hdr_vtag *vtag_hdr = NULL;
-
-               ext = mei_ext_begin(meta);
+               struct mei_ext_hdr *ext = mei_ext_begin(meta);
                do {
                        switch (ext->type) {
                        case MEI_EXT_HDR_VTAG:
                                vtag_hdr = (struct mei_ext_hdr_vtag *)ext;
                                break;
+                       case MEI_EXT_HDR_GSC:
+                               gsc_f2h = (struct mei_ext_hdr_gsc_f2h *)ext;
+                               cb->ext_hdr = kzalloc(sizeof(*gsc_f2h), GFP_KERNEL);
+                               if (!cb->ext_hdr) {
+                                       cb->status = -ENOMEM;
+                                       goto discard;
+                               }
+                               break;
                        case MEI_EXT_HDR_NONE:
                                fallthrough;
                        default:
+                               cl_err(dev, cl, "unknown extended header\n");
                                cb->status = -EPROTO;
                                break;
                        }
@@ -141,12 +150,14 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
                        ext = mei_ext_next(ext);
                } while (!mei_ext_last(meta, ext));
 
-               if (!vtag_hdr) {
-                       cl_dbg(dev, cl, "vtag not found in extended header.\n");
+               if (!vtag_hdr && !gsc_f2h) {
+                       cl_dbg(dev, cl, "no vtag or gsc found in extended header.\n");
                        cb->status = -EPROTO;
                        goto discard;
                }
+       }
 
+       if (vtag_hdr) {
                cl_dbg(dev, cl, "vtag: %d\n", vtag_hdr->vtag);
                if (cb->vtag && cb->vtag != vtag_hdr->vtag) {
                        cl_err(dev, cl, "mismatched tag: %d != %d\n",
@@ -157,6 +168,28 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
                cb->vtag = vtag_hdr->vtag;
        }
 
+       if (gsc_f2h) {
+               u32 ext_hdr_len = mei_ext_hdr_len(&gsc_f2h->hdr);
+
+               if (!dev->hbm_f_gsc_supported) {
+                       cl_err(dev, cl, "gsc extended header is not supported\n");
+                       cb->status = -EPROTO;
+                       goto discard;
+               }
+
+               if (length) {
+                       cl_err(dev, cl, "no data allowed in cb with gsc\n");
+                       cb->status = -EPROTO;
+                       goto discard;
+               }
+               if (ext_hdr_len > sizeof(*gsc_f2h)) {
+                       cl_err(dev, cl, "gsc extended header is too big %u\n", ext_hdr_len);
+                       cb->status = -EPROTO;
+                       goto discard;
+               }
+               memcpy(cb->ext_hdr, gsc_f2h, ext_hdr_len);
+       }
+
        if (!mei_cl_is_connected(cl)) {
                cl_dbg(dev, cl, "not connected\n");
                cb->status = -ENODEV;
index 6bb3e1ba9ded4979ea2eb710ec330718e2ccc36f..8d8018428d9dd8a254f0a2aff7ab036e69654c40 100644 (file)
@@ -116,12 +116,16 @@ enum mei_cb_file_ops {
  * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW
  *
  * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking
+ *
+ * @MEI_CL_IO_SGL: send command with sgl list.
  */
 enum mei_cl_io_mode {
        MEI_CL_IO_TX_BLOCKING = BIT(0),
        MEI_CL_IO_TX_INTERNAL = BIT(1),
 
        MEI_CL_IO_RX_NONBLOCK = BIT(2),
+
+       MEI_CL_IO_SGL         = BIT(3),
 };
 
 /*
@@ -206,6 +210,7 @@ struct mei_cl;
  * @status: io status of the cb
  * @internal: communication between driver and FW flag
  * @blocking: transmission blocking mode
+ * @ext_hdr: extended header
  */
 struct mei_cl_cb {
        struct list_head list;
@@ -218,6 +223,7 @@ struct mei_cl_cb {
        int status;
        u32 internal:1;
        u32 blocking:1;
+       struct mei_ext_hdr *ext_hdr;
 };
 
 /**
@@ -494,6 +500,7 @@ struct mei_dev_timeouts {
  * @hbm_f_vt_supported  : hbm feature vtag supported
  * @hbm_f_cap_supported : hbm feature capabilities message supported
  * @hbm_f_cd_supported  : hbm feature client dma supported
+ * @hbm_f_gsc_supported : hbm feature gsc supported
  *
  * @fw_ver : FW versions
  *
@@ -585,6 +592,7 @@ struct mei_device {
        unsigned int hbm_f_vt_supported:1;
        unsigned int hbm_f_cap_supported:1;
        unsigned int hbm_f_cd_supported:1;
+       unsigned int hbm_f_gsc_supported:1;
 
        struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
 
index 5c39457e3f53d901f3d25b2fa491143f8545c8a0..8dd09b1722ebdd0c0c4ccb61c3e4cca5fafeda29 100644 (file)
@@ -77,10 +77,35 @@ mei_pxp_receive_message(struct device *dev, void *buffer, size_t size)
        return byte;
 }
 
+/**
+ * mei_pxp_gsc_command() - sends a gsc command, by sending
+ * a sgl mei message to gsc and receiving reply from gsc
+ *
+ * @dev: device corresponding to the mei_cl_device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return: bytes sent on Success, <0 on Failure
+ */
+static ssize_t mei_pxp_gsc_command(struct device *dev, u8 client_id, u32 fence_id,
+                                  struct scatterlist *sg_in, size_t total_in_len,
+                                  struct scatterlist *sg_out)
+{
+       struct mei_cl_device *cldev;
+
+       cldev = to_mei_cl_device(dev);
+
+       return mei_cldev_send_gsc_command(cldev, client_id, fence_id, sg_in, total_in_len, sg_out);
+}
+
 static const struct i915_pxp_component_ops mei_pxp_ops = {
        .owner = THIS_MODULE,
        .send = mei_pxp_send_message,
        .recv = mei_pxp_receive_message,
+       .gsc_command = mei_pxp_gsc_command,
 };
 
 static int mei_component_master_bind(struct device *dev)
@@ -131,17 +156,24 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent,
 {
        struct device *base = data;
 
+       if (!dev)
+               return 0;
+
        if (!dev->driver || strcmp(dev->driver->name, "i915") ||
            subcomponent != I915_COMPONENT_PXP)
                return 0;
 
        base = base->parent;
-       if (!base)
+       if (!base) /* mei device */
                return 0;
 
-       base = base->parent;
-       dev = dev->parent;
+       base = base->parent; /* pci device */
+       /* for dgfx */
+       if (base && dev == base)
+               return 1;
 
+       /* for pch */
+       dev = dev->parent;
        return (base && dev && dev == base);
 }
 
index af593ec6446924a612962a062e1f79ad52956507..a702b6ec17f7ca95eda7d225ba1936ae1e15a298 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/mutex.h>
 #include <linux/device.h>
+struct scatterlist;
 
 /**
  * struct i915_pxp_component_ops - ops for PXP services.
@@ -23,6 +24,10 @@ struct i915_pxp_component_ops {
 
        int (*send)(struct device *dev, const void *message, size_t size);
        int (*recv)(struct device *dev, void *buffer, size_t size);
+       ssize_t (*gsc_command)(struct device *dev, u8 client_id, u32 fence_id,
+                              struct scatterlist *sg_in, size_t total_in_len,
+                              struct scatterlist *sg_out);
+
 };
 
 /**
index df1fab44ea5c119a5110ff6f9fba185d96194d84..fd6e0620658d6515b3e234cea861675fb5578712 100644 (file)
@@ -11,6 +11,7 @@
 
 struct mei_cl_device;
 struct mei_device;
+struct scatterlist;
 
 typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev);
 
@@ -116,6 +117,11 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
 int mei_cldev_enable(struct mei_cl_device *cldev);
 int mei_cldev_disable(struct mei_cl_device *cldev);
 bool mei_cldev_enabled(const struct mei_cl_device *cldev);
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+                                  u8 client_id, u32 fence_id,
+                                  struct scatterlist *sg_in,
+                                  size_t total_in_len,
+                                  struct scatterlist *sg_out);
 
 void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size);
 int mei_cldev_dma_unmap(struct mei_cl_device *cldev);
index 520ad2691a99d166ffaff16c819b01bb1bda11f8..8df261c5ab9b1a222b8d2aa000266a8568c43580 100644 (file)
@@ -645,6 +645,22 @@ typedef struct drm_i915_irq_wait {
  */
 #define   I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP       (1ul << 5)
 
+/*
+ * Query the status of HuC load.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ *  -ENODEV if HuC is not present on this platform,
+ *  -EOPNOTSUPP if HuC firmware usage is disabled,
+ *  -ENOPKG if HuC firmware fetch failed,
+ *  -ENOEXEC if HuC firmware is invalid or mismatched,
+ *  -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC,
+ *  -EIO if the FW transfer or the FW authentication failed.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of the
+ * following values:
+ *  * 0 if HuC firmware load is not complete,
+ *  * 1 if HuC firmware is authenticated and running.
+ */
 #define I915_PARAM_HUC_STATUS           42
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -749,6 +765,12 @@ typedef struct drm_i915_irq_wait {
 /* Query if the kernel supports the I915_USERPTR_PROBE flag. */
 #define I915_PARAM_HAS_USERPTR_PROBE 56
 
+/*
+ * Frequency of the timestamps in OA reports. This used to be the same as the CS
+ * timestamp frequency, but differs on some platforms.
+ */
+#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+
 /* Must be kept compact -- no holes and well documented */
 
 /**
@@ -2650,6 +2672,10 @@ enum drm_i915_oa_format {
        I915_OA_FORMAT_A12_B8_C8,
        I915_OA_FORMAT_A32u40_A4u32_B8_C8,
 
+       /* DG2 */
+       I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
+       I915_OA_FORMAT_A24u40_A14u32_B8_C8,
+
        I915_OA_FORMAT_MAX          /* non-ABI */
 };
 
@@ -3493,27 +3519,13 @@ struct drm_i915_gem_create_ext {
         *
         * The (page-aligned) allocated size for the object will be returned.
         *
-        * DG2 64K min page size implications:
-        *
-        * On discrete platforms, starting from DG2, we have to contend with GTT
-        * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
-        * objects.  Specifically the hardware only supports 64K or larger GTT
-        * page sizes for such memory. The kernel will already ensure that all
-        * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
-        * sizes underneath.
-        *
-        * Note that the returned size here will always reflect any required
-        * rounding up done by the kernel, i.e 4K will now become 64K on devices
-        * such as DG2. The kernel will always select the largest minimum
-        * page-size for the set of possible placements as the value to use when
-        * rounding up the @size.
-        *
-        * Special DG2 GTT address alignment requirement:
-        *
-        * The GTT alignment will also need to be at least 2M for such objects.
+        * On platforms like DG2/ATS the kernel will always use 64K or larger
+        * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a
+        * minimum of 64K GTT alignment for such objects.
         *
-        * Note that due to how the hardware implements 64K GTT page support, we
-        * have some further complications:
+        * NOTE: Previously the ABI here required a minimum GTT alignment of 2M
+        * on DG2/ATS, due to how the hardware implemented 64K GTT page support,
+        * where we had the following complications:
         *
         *   1) The entire PDE (which covers a 2MB virtual address range), must
         *   contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
@@ -3522,12 +3534,10 @@ struct drm_i915_gem_create_ext {
         *   2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
         *   objects.
         *
-        * To keep things simple for userland, we mandate that any GTT mappings
-        * must be aligned to and rounded up to 2MB. The kernel will internally
-        * pad them out to the next 2MB boundary. As this only wastes virtual
-        * address space and avoids userland having to copy any needlessly
-        * complicated PDE sharing scheme (coloring) and only affects DG2, this
-        * is deemed to be a good compromise.
+        * However on actual production HW this was completely changed to now
+        * allow setting a TLB hint at the PTE level (see PS64), which is a lot
+        * more flexible than the above. With this the 2M restriction was
+        * dropped where we now only require 64K.
         */
        __u64 size;