Merge tag 'drm-intel-gt-next-2023-12-08' of git://anongit.freedesktop.org/drm/drm...
authorDave Airlie <airlied@redhat.com>
Wed, 13 Dec 2023 01:20:49 +0000 (11:20 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 13 Dec 2023 01:20:49 +0000 (11:20 +1000)
UAPI Changes:

-   drm/i915: Implement fdinfo memory stats printing

    Use the newly added drm_print_memory_stats helper to show memory
    utilisation of our objects in drm/driver specific fdinfo output.

    To collect the stats we walk the per memory regions object lists
    and accumulate object size into the respective drm_memory_stats
    categories.

Cross-subsystem Changes:

- Backmerge of drm-next (to bring drm-intel-next for PXP changes)

Driver Changes:

- Wa_18028616096 now applies to all DG2 (Matt R)
- Drop Wa_22014600077 on all DG2 (Matt R)
- Add new ATS-M device ID (Haridhar)
- More Meteorlake (MTL) workarounds (Matt R, Dnyaneshwar, Jonathan,
  Gustavo, Radhakrishna)
- PMU WARN_ON cleanup on driver unbind (Umesh)
- Limit GGTT WC flushing workaround to pre BXT/ICL platforms
- Complement implementation for Wa_16018031267 / Wa_16018063123
  (Andrzej, Jonathan, Nirmoy, Chris)

- Properly print internal GSC engine in trace logs (Tvrtko)
- Track gt pm wakerefs (Andrzej)
- Fix null deref bugs on perf code when perf is disabled (Harshit,
  Tvrtko)
- Fix __i915_request_create memory leak on driver unbind (Andrzej)
- Remove spurious unsupported HuC message on MTL (Daniele)
- Read a shadowed mmio register for ggtt flush (Vinay)
- Add missing new-line to GT_TRACE (Andrzej)
- Add drm_dbgs for critical PXP events (Alan)
- Skip pxp init if gt is wedged (Zhanjun)

- Replace custom intel runtime_pm tracker with ref_tracker library
  (Andrzej)
- Compiler warning/static checker/coding style cleanups (Arnd, Nirmoy,
  Soumya, Gilbert, Dorcas, Kunwu, Sam, Tvrtko)
- Code structure and helper cleanups (Jani, Tvrtko, Andi)
- Selftest improvements (John, Tvrtko, Andrzej)

Signed-off-by: Dave Airlie <airlied@redhat.com>
# Conflicts:
# drivers/gpu/drm/i915/gt/intel_gt_mcr.c
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZXNBcsSwJEVsq9On@jlahtine-mobl.ger.corp.intel.com
1  2 
drivers/gpu/drm/i915/display/intel_display_power.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gt/intel_gt.h
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_gpu_error.h
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.h

index f23080a4368dd068ca58e76af721f93a7166d4ca,f78bfcf2ce000880a3ab53de0825052caeb32dad..5f091502719b956a7fc3037ecc473b81ba3455d9
@@@ -405,7 -405,7 +405,7 @@@ print_async_put_domains_state(struct i9
                                                     struct drm_i915_private,
                                                     display.power.domains);
  
-       drm_dbg(&i915->drm, "async_put_wakeref %u\n",
+       drm_dbg(&i915->drm, "async_put_wakeref %lu\n",
                power_domains->async_put_wakeref);
  
        print_power_domains(power_domains, "async_put_domains[0]",
@@@ -967,7 -967,7 +967,7 @@@ static u32 get_allowed_dc_mask(const st
                DISPLAY_VER(dev_priv) >= 11 ?
               DC_STATE_EN_DC9 : 0;
  
 -      if (!dev_priv->params.disable_power_well)
 +      if (!dev_priv->display.params.disable_power_well)
                max_dc = 0;
  
        if (enable_dc >= 0 && enable_dc <= max_dc) {
@@@ -1016,11 -1016,11 +1016,11 @@@ int intel_power_domains_init(struct drm
  {
        struct i915_power_domains *power_domains = &dev_priv->display.power.domains;
  
 -      dev_priv->params.disable_power_well =
 +      dev_priv->display.params.disable_power_well =
                sanitize_disable_power_well_option(dev_priv,
 -                                                 dev_priv->params.disable_power_well);
 +                                                 dev_priv->display.params.disable_power_well);
        power_domains->allowed_dc_mask =
 -              get_allowed_dc_mask(dev_priv, dev_priv->params.enable_dc);
 +              get_allowed_dc_mask(dev_priv, dev_priv->display.params.enable_dc);
  
        power_domains->target_dc_state =
                sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
@@@ -1697,14 -1697,14 +1697,14 @@@ static void icl_display_core_init(struc
        if (resume)
                intel_dmc_load_program(dev_priv);
  
 -      /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */
 -      if (DISPLAY_VER(dev_priv) >= 12)
 +      /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */
 +      if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0)))
                intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0,
                             DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM |
                             DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR);
  
        /* Wa_14011503030:xelpd */
 -      if (DISPLAY_VER(dev_priv) >= 13)
 +      if (DISPLAY_VER(dev_priv) == 13)
                intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0);
  }
  
@@@ -1950,7 -1950,7 +1950,7 @@@ void intel_power_domains_init_hw(struc
                intel_display_power_get(i915, POWER_DOMAIN_INIT);
  
        /* Disable power support if the user asked so. */
 -      if (!i915->params.disable_power_well) {
 +      if (!i915->display.params.disable_power_well) {
                drm_WARN_ON(&i915->drm, power_domains->disable_wakeref);
                i915->display.power.domains.disable_wakeref = intel_display_power_get(i915,
                                                                                      POWER_DOMAIN_INIT);
@@@ -1977,7 -1977,7 +1977,7 @@@ void intel_power_domains_driver_remove(
                fetch_and_zero(&i915->display.power.domains.init_wakeref);
  
        /* Remove the refcount we took to keep power well support disabled. */
 -      if (!i915->params.disable_power_well)
 +      if (!i915->display.params.disable_power_well)
                intel_display_power_put(i915, POWER_DOMAIN_INIT,
                                        fetch_and_zero(&i915->display.power.domains.disable_wakeref));
  
@@@ -2096,7 -2096,7 +2096,7 @@@ void intel_power_domains_suspend(struc
         * Even if power well support was disabled we still want to disable
         * power wells if power domains must be deinitialized for suspend.
         */
 -      if (!i915->params.disable_power_well)
 +      if (!i915->display.params.disable_power_well)
                intel_display_power_put(i915, POWER_DOMAIN_INIT,
                                        fetch_and_zero(&i915->display.power.domains.disable_wakeref));
  
index ccc077b74d2de109fbc56db1580abf672d3ada65,b1aa62dfb155d58a46f4bcde2594e28b327a14c8..81a57dd52dfda48d31506c7295dbb1ad1868ea57
@@@ -9,7 -9,6 +9,7 @@@
  #include <linux/sync_file.h>
  #include <linux/uaccess.h>
  
 +#include <drm/drm_auth.h>
  #include <drm/drm_syncobj.h>
  
  #include "display/intel_frontbuffer.h"
@@@ -254,6 -253,8 +254,8 @@@ struct i915_execbuffer 
        struct intel_gt *gt; /* gt for the execbuf */
        struct intel_context *context; /* logical state for the request */
        struct i915_gem_context *gem_context; /** caller's context */
+       intel_wakeref_t wakeref;
+       intel_wakeref_t wakeref_gt0;
  
        /** our requests to build */
        struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@@ -1679,7 -1680,7 +1681,7 @@@ static int eb_copy_relocations(const st
                urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
                size = nreloc * sizeof(*relocs);
  
-               relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+               relocs = kvmalloc_array(1, size, GFP_KERNEL);
                if (!relocs) {
                        err = -ENOMEM;
                        goto err;
@@@ -2720,13 -2721,13 +2722,13 @@@ eb_select_engine(struct i915_execbuffe
  
        for_each_child(ce, child)
                intel_context_get(child);
-       intel_gt_pm_get(gt);
+       eb->wakeref = intel_gt_pm_get(ce->engine->gt);
        /*
         * Keep GT0 active on MTL so that i915_vma_parked() doesn't
         * free VMAs while execbuf ioctl is validating VMAs.
         */
        if (gt->info.id)
-               intel_gt_pm_get(to_gt(gt->i915));
+               eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
  
        if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
                err = intel_context_alloc_state(ce);
  
  err:
        if (gt->info.id)
-               intel_gt_pm_put(to_gt(gt->i915));
+               intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
  
-       intel_gt_pm_put(gt);
+       intel_gt_pm_put(ce->engine->gt, eb->wakeref);
        for_each_child(ce, child)
                intel_context_put(child);
        intel_context_put(ce);
@@@ -2786,8 -2787,8 +2788,8 @@@ eb_put_engine(struct i915_execbuffer *e
         * i915_vma_parked() from interfering while execbuf validates vmas.
         */
        if (eb->gt->info.id)
-               intel_gt_pm_put(to_gt(eb->gt->i915));
-       intel_gt_pm_put(eb->gt);
+               intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
+       intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
        for_each_child(eb->context, child)
                intel_context_put(child);
        intel_context_put(eb->context);
index e1f13735f530edad3e72e2dfd8b0d5b07f2a641a,2f81c1c792382a9b78c9b7275180bb85944dcef8..608f5c87292857c6b2777bbd809c5bd87a48238c
@@@ -82,6 -82,10 +82,10 @@@ struct drm_printer
                  ##__VA_ARGS__);                                       \
  } while (0)
  
+ #define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+       IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
+       engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
  static inline bool gt_is_root(struct intel_gt *gt)
  {
        return !gt->info.id;
@@@ -114,6 -118,11 +118,11 @@@ static inline struct intel_gt *gsc_to_g
        return container_of(gsc, struct intel_gt, gsc);
  }
  
+ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+ {
+       return guc_to_gt(guc)->i915;
+ }
  void intel_gt_common_init_early(struct intel_gt *gt);
  int intel_root_gt_init_early(struct drm_i915_private *i915);
  int intel_gt_assign_ggtt(struct intel_gt *gt);
@@@ -167,20 -176,6 +176,20 @@@ void intel_gt_release_all(struct drm_i9
             (id__)++) \
                for_each_if(((gt__) = (i915__)->gt[(id__)]))
  
 +/* Simple iterator over all initialised engines */
 +#define for_each_engine(engine__, gt__, id__) \
 +      for ((id__) = 0; \
 +           (id__) < I915_NUM_ENGINES; \
 +           (id__)++) \
 +              for_each_if ((engine__) = (gt__)->engine[(id__)])
 +
 +/* Iterator over subset of engines selected by mask */
 +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
 +      for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
 +           (tmp__) ? \
 +           ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
 +           0;)
 +
  void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p);
  
index bfe92d2402ea21f81b53df2adf504e5a4c436f9a,beffac46a5e29bba43c4e83ddcb03295b80bbecf..db99c2ef66db84726129c9fba1926730ad753472
@@@ -32,8 -32,6 +32,8 @@@
  
  #include <drm/drm_debugfs.h>
  
 +#include "display/intel_display_params.h"
 +
  #include "gem/i915_gem_context.h"
  #include "gt/intel_gt.h"
  #include "gt/intel_gt_buffer_pool.h"
@@@ -51,6 -49,7 +51,7 @@@
  #include "i915_debugfs.h"
  #include "i915_debugfs_params.h"
  #include "i915_driver.h"
+ #include "i915_gpu_error.h"
  #include "i915_irq.h"
  #include "i915_reg.h"
  #include "i915_scheduler.h"
@@@ -69,13 -68,13 +70,13 @@@ static int i915_capabilities(struct seq
        seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915));
  
        intel_device_info_print(INTEL_INFO(i915), RUNTIME_INFO(i915), &p);
 -      intel_display_device_info_print(DISPLAY_INFO(i915), DISPLAY_RUNTIME_INFO(i915), &p);
        i915_print_iommu_status(i915, &p);
        intel_gt_info_print(&to_gt(i915)->info, &p);
        intel_driver_caps_print(&i915->caps, &p);
  
        kernel_param_lock(THIS_MODULE);
        i915_params_dump(&i915->params, &p);
 +      intel_display_params_dump(i915, &p);
        kernel_param_unlock(THIS_MODULE);
  
        return 0;
@@@ -299,107 -298,6 +300,6 @@@ static int i915_gem_object_info(struct 
        return 0;
  }
  
- #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
- static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
-                             size_t count, loff_t *pos)
- {
-       struct i915_gpu_coredump *error;
-       ssize_t ret;
-       void *buf;
-       error = file->private_data;
-       if (!error)
-               return 0;
-       /* Bounce buffer required because of kernfs __user API convenience. */
-       buf = kmalloc(count, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-       ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
-       if (ret <= 0)
-               goto out;
-       if (!copy_to_user(ubuf, buf, ret))
-               *pos += ret;
-       else
-               ret = -EFAULT;
- out:
-       kfree(buf);
-       return ret;
- }
- static int gpu_state_release(struct inode *inode, struct file *file)
- {
-       i915_gpu_coredump_put(file->private_data);
-       return 0;
- }
- static int i915_gpu_info_open(struct inode *inode, struct file *file)
- {
-       struct drm_i915_private *i915 = inode->i_private;
-       struct i915_gpu_coredump *gpu;
-       intel_wakeref_t wakeref;
-       gpu = NULL;
-       with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-               gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
-       if (IS_ERR(gpu))
-               return PTR_ERR(gpu);
-       file->private_data = gpu;
-       return 0;
- }
- static const struct file_operations i915_gpu_info_fops = {
-       .owner = THIS_MODULE,
-       .open = i915_gpu_info_open,
-       .read = gpu_state_read,
-       .llseek = default_llseek,
-       .release = gpu_state_release,
- };
- static ssize_t
- i915_error_state_write(struct file *filp,
-                      const char __user *ubuf,
-                      size_t cnt,
-                      loff_t *ppos)
- {
-       struct i915_gpu_coredump *error = filp->private_data;
-       if (!error)
-               return 0;
-       drm_dbg(&error->i915->drm, "Resetting error state\n");
-       i915_reset_error_state(error->i915);
-       return cnt;
- }
- static int i915_error_state_open(struct inode *inode, struct file *file)
- {
-       struct i915_gpu_coredump *error;
-       error = i915_first_error_state(inode->i_private);
-       if (IS_ERR(error))
-               return PTR_ERR(error);
-       file->private_data  = error;
-       return 0;
- }
- static const struct file_operations i915_error_state_fops = {
-       .owner = THIS_MODULE,
-       .open = i915_error_state_open,
-       .read = gpu_state_read,
-       .write = i915_error_state_write,
-       .llseek = default_llseek,
-       .release = gpu_state_release,
- };
- #endif
  static int i915_frequency_info(struct seq_file *m, void *unused)
  {
        struct drm_i915_private *i915 = node_to_i915(m->private);
@@@ -839,10 -737,6 +739,6 @@@ static const struct i915_debugfs_files 
        {"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
        {"i915_wedged", &i915_wedged_fops},
        {"i915_gem_drop_caches", &i915_drop_caches_fops},
- #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-       {"i915_error_state", &i915_error_state_fops},
-       {"i915_gpu_info", &i915_gpu_info_fops},
- #endif
  };
  
  void i915_debugfs_register(struct drm_i915_private *dev_priv)
        drm_debugfs_create_files(i915_debugfs_list,
                                 ARRAY_SIZE(i915_debugfs_list),
                                 minor->debugfs_root, minor);
+       i915_gpu_error_debugfs_register(dev_priv);
  }
index 2a1faf4039659c298109df078a25e07274377336,8a17eb7f93214b21d6ee5ee83494f13f4eb5e657..c7d7c3b7ecc638f433b6b018c8b9562bf09eb0d6
@@@ -231,10 -231,16 +231,10 @@@ static int i915_driver_early_probe(stru
  
        spin_lock_init(&dev_priv->irq_lock);
        spin_lock_init(&dev_priv->gpu_error.lock);
 -      mutex_init(&dev_priv->display.backlight.lock);
  
        mutex_init(&dev_priv->sb_lock);
        cpu_latency_qos_add_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
  
 -      mutex_init(&dev_priv->display.audio.mutex);
 -      mutex_init(&dev_priv->display.wm.wm_mutex);
 -      mutex_init(&dev_priv->display.pps.mutex);
 -      mutex_init(&dev_priv->display.hdcp.hdcp_mutex);
 -
        i915_memcpy_init_early(dev_priv);
        intel_runtime_pm_init_early(&dev_priv->runtime_pm);
  
@@@ -798,7 -804,9 +798,9 @@@ int i915_driver_probe(struct pci_dev *p
        if (ret)
                goto out_cleanup_modeset2;
  
-       intel_pxp_init(i915);
+       ret = intel_pxp_init(i915);
+       if (ret != -ENODEV)
+               drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
  
        ret = intel_display_driver_probe(i915);
        if (ret)
@@@ -901,8 -909,6 +903,8 @@@ static void i915_driver_release(struct 
        intel_runtime_pm_driver_release(rpm);
  
        i915_driver_late_release(dev_priv);
 +
 +      intel_display_device_remove(dev_priv);
  }
  
  static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
@@@ -1033,7 -1039,7 +1035,7 @@@ void i915_driver_shutdown(struct drm_i9
        intel_power_domains_driver_remove(i915);
        enable_rpm_wakeref_asserts(&i915->runtime_pm);
  
-       intel_runtime_pm_driver_release(&i915->runtime_pm);
+       intel_runtime_pm_driver_last_release(&i915->runtime_pm);
  }
  
  static bool suspend_to_idle(struct drm_i915_private *dev_priv)
index 0971f497632415dcff2d85746ad9536877c6f009,f9e750217f1834d49b67678f57c4bc878df413b7..d04660b60046370d436428e996e9634bb5e722cb
@@@ -57,6 -57,7 +57,7 @@@
  #include "i915_memcpy.h"
  #include "i915_reg.h"
  #include "i915_scatterlist.h"
+ #include "i915_sysfs.h"
  #include "i915_utils.h"
  
  #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@@ -520,7 -521,7 +521,7 @@@ __find_vma(struct i915_vma_coredump *vm
        return NULL;
  }
  
- struct i915_vma_coredump *
+ static struct i915_vma_coredump *
  intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
  {
        return __find_vma(ee->vma, "batch");
@@@ -609,9 -610,9 +610,9 @@@ void i915_error_printf(struct drm_i915_
        va_end(args);
  }
  
- void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-                              const struct intel_engine_cs *engine,
-                              const struct i915_vma_coredump *vma)
static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+                                     const struct intel_engine_cs *engine,
+                                     const struct i915_vma_coredump *vma)
  {
        char out[ASCII85_BUFSZ];
        struct page *page;
@@@ -660,7 -661,6 +661,7 @@@ static void err_print_params(struct drm
        struct drm_printer p = i915_error_printer(m);
  
        i915_params_dump(params, &p);
 +      intel_display_params_dump(m->i915, &p);
  }
  
  static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@@ -1028,7 -1028,6 +1029,7 @@@ static void i915_vma_coredump_free(stru
  static void cleanup_params(struct i915_gpu_coredump *error)
  {
        i915_params_free(&error->params);
 +      intel_display_params_free(&error->display_params);
  }
  
  static void cleanup_uc(struct intel_uc_coredump *uc)
@@@ -1990,7 -1989,6 +1991,7 @@@ static void capture_gen(struct i915_gpu
        error->suspend_count = i915->suspend_count;
  
        i915_params_copy(&error->params, &i915->params);
 +      intel_display_params_copy(&error->display_params);
        memcpy(&error->device_info,
               INTEL_INFO(i915),
               sizeof(error->device_info));
@@@ -2140,7 -2138,7 +2141,7 @@@ __i915_gpu_coredump(struct intel_gt *gt
        return error;
  }
  
- struct i915_gpu_coredump *
+ static struct i915_gpu_coredump *
  i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
  {
        static DEFINE_MUTEX(capture_mutex);
@@@ -2177,7 -2175,7 +2178,7 @@@ void i915_error_state_store(struct i915
            ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
                pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
                pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
 -              pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n");
 +              pr_info("Please see https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html for details.\n");
                pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
                pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
                pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",
@@@ -2211,7 -2209,7 +2212,7 @@@ void i915_capture_error_state(struct in
        i915_gpu_coredump_put(error);
  }
  
- struct i915_gpu_coredump *
+ static struct i915_gpu_coredump *
  i915_first_error_state(struct drm_i915_private *i915)
  {
        struct i915_gpu_coredump *error;
@@@ -2378,3 -2376,184 +2379,184 @@@ void intel_klog_error_capture(struct in
        drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
  }
  #endif
+ static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
+                             size_t count, loff_t *pos)
+ {
+       struct i915_gpu_coredump *error;
+       ssize_t ret;
+       void *buf;
+       error = file->private_data;
+       if (!error)
+               return 0;
+       /* Bounce buffer required because of kernfs __user API convenience. */
+       buf = kmalloc(count, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+       ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
+       if (ret <= 0)
+               goto out;
+       if (!copy_to_user(ubuf, buf, ret))
+               *pos += ret;
+       else
+               ret = -EFAULT;
+ out:
+       kfree(buf);
+       return ret;
+ }
+ static int gpu_state_release(struct inode *inode, struct file *file)
+ {
+       i915_gpu_coredump_put(file->private_data);
+       return 0;
+ }
+ static int i915_gpu_info_open(struct inode *inode, struct file *file)
+ {
+       struct drm_i915_private *i915 = inode->i_private;
+       struct i915_gpu_coredump *gpu;
+       intel_wakeref_t wakeref;
+       gpu = NULL;
+       with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+               gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+       if (IS_ERR(gpu))
+               return PTR_ERR(gpu);
+       file->private_data = gpu;
+       return 0;
+ }
+ static const struct file_operations i915_gpu_info_fops = {
+       .owner = THIS_MODULE,
+       .open = i915_gpu_info_open,
+       .read = gpu_state_read,
+       .llseek = default_llseek,
+       .release = gpu_state_release,
+ };
+ static ssize_t
+ i915_error_state_write(struct file *filp,
+                      const char __user *ubuf,
+                      size_t cnt,
+                      loff_t *ppos)
+ {
+       struct i915_gpu_coredump *error = filp->private_data;
+       if (!error)
+               return 0;
+       drm_dbg(&error->i915->drm, "Resetting error state\n");
+       i915_reset_error_state(error->i915);
+       return cnt;
+ }
+ static int i915_error_state_open(struct inode *inode, struct file *file)
+ {
+       struct i915_gpu_coredump *error;
+       error = i915_first_error_state(inode->i_private);
+       if (IS_ERR(error))
+               return PTR_ERR(error);
+       file->private_data  = error;
+       return 0;
+ }
+ static const struct file_operations i915_error_state_fops = {
+       .owner = THIS_MODULE,
+       .open = i915_error_state_open,
+       .read = gpu_state_read,
+       .write = i915_error_state_write,
+       .llseek = default_llseek,
+       .release = gpu_state_release,
+ };
+ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+ {
+       struct drm_minor *minor = i915->drm.primary;
+       debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915,
+                           &i915_error_state_fops);
+       debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
+                           &i915_gpu_info_fops);
+ }
+ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+                               struct bin_attribute *attr, char *buf,
+                               loff_t off, size_t count)
+ {
+       struct device *kdev = kobj_to_dev(kobj);
+       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+       struct i915_gpu_coredump *gpu;
+       ssize_t ret = 0;
+       /*
+        * FIXME: Concurrent clients triggering resets and reading + clearing
+        * dumps can cause inconsistent sysfs reads when a user calls in with a
+        * non-zero offset to complete a prior partial read but the
+        * gpu_coredump has been cleared or replaced.
+        */
+       gpu = i915_first_error_state(i915);
+       if (IS_ERR(gpu)) {
+               ret = PTR_ERR(gpu);
+       } else if (gpu) {
+               ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+               i915_gpu_coredump_put(gpu);
+       } else {
+               const char *str = "No error state collected\n";
+               size_t len = strlen(str);
+               if (off < len) {
+                       ret = min_t(size_t, count, len - off);
+                       memcpy(buf, str + off, ret);
+               }
+       }
+       return ret;
+ }
+ static ssize_t error_state_write(struct file *file, struct kobject *kobj,
+                                struct bin_attribute *attr, char *buf,
+                                loff_t off, size_t count)
+ {
+       struct device *kdev = kobj_to_dev(kobj);
+       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+       drm_dbg(&dev_priv->drm, "Resetting error state\n");
+       i915_reset_error_state(dev_priv);
+       return count;
+ }
+ static const struct bin_attribute error_state_attr = {
+       .attr.name = "error",
+       .attr.mode = S_IRUSR | S_IWUSR,
+       .size = 0,
+       .read = error_state_read,
+       .write = error_state_write,
+ };
+ void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+ {
+       struct device *kdev = i915->drm.primary->kdev;
+       if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
+               drm_err(&i915->drm, "error_state sysfs setup failed\n");
+ }
+ void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+ {
+       struct device *kdev = i915->drm.primary->kdev;
+       sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+ }
index 4ce227f7e1e125290b9ef22a0943c034b4ef565a,b75052e69a2b76db5aea12f96f817eb37adcafeb..7c255bb1c3190d3294950bf07f77beacc59caaff
@@@ -15,8 -15,8 +15,9 @@@
  #include <drm/drm_mm.h>
  
  #include "display/intel_display_device.h"
 +#include "display/intel_display_params.h"
  #include "gt/intel_engine.h"
+ #include "gt/intel_engine_types.h"
  #include "gt/intel_gt_types.h"
  #include "gt/uc/intel_uc_fw.h"
  
@@@ -215,7 -215,6 +216,7 @@@ struct i915_gpu_coredump 
        struct intel_display_runtime_info display_runtime_info;
        struct intel_driver_caps driver_caps;
        struct i915_params params;
 +      struct intel_display_params display_params;
  
        struct intel_overlay_error_state *overlay;
  
@@@ -234,7 -233,7 +235,7 @@@ struct i915_gpu_error 
        atomic_t reset_count;
  
        /** Number of times an engine has been reset */
-       atomic_t reset_engine_count[I915_NUM_ENGINES];
+       atomic_t reset_engine_count[MAX_ENGINE_CLASS];
  };
  
  struct drm_i915_error_state_buf {
@@@ -257,7 -256,14 +258,14 @@@ static inline u32 i915_reset_count(stru
  static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
                                          const struct intel_engine_cs *engine)
  {
-       return atomic_read(&error->reset_engine_count[engine->uabi_class]);
+       return atomic_read(&error->reset_engine_count[engine->class]);
+ }
+ static inline void
+ i915_increase_reset_engine_count(struct i915_gpu_error *error,
+                                const struct intel_engine_cs *engine)
+ {
+       atomic_inc(&error->reset_engine_count[engine->class]);
  }
  
  #define CORE_DUMP_FLAG_NONE           0x0
@@@ -277,14 -283,7 +285,7 @@@ static inline void intel_klog_error_cap
  
  __printf(2, 3)
  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
- void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-                              const struct intel_engine_cs *engine,
-                              const struct i915_vma_coredump *vma);
- struct i915_vma_coredump *
- intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
- struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
-                                           intel_engine_mask_t engine_mask, u32 dump_flags);
  void i915_capture_error_state(struct intel_gt *gt,
                              intel_engine_mask_t engine_mask, u32 dump_flags);
  
@@@ -332,10 -331,13 +333,13 @@@ static inline void i915_gpu_coredump_pu
                kref_put(&gpu->ref, __i915_gpu_coredump_free);
  }
  
- struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
  void i915_reset_error_state(struct drm_i915_private *i915);
  void i915_disable_error_state(struct drm_i915_private *i915, int err);
  
+ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
+ void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
+ void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
  #else
  
  __printf(2, 3)
@@@ -403,12 -405,6 +407,6 @@@ static inline void i915_gpu_coredump_pu
  {
  }
  
- static inline struct i915_gpu_coredump *
- i915_first_error_state(struct drm_i915_private *i915)
- {
-       return ERR_PTR(-ENODEV);
- }
  static inline void i915_reset_error_state(struct drm_i915_private *i915)
  {
  }
@@@ -418,6 -414,18 +416,18 @@@ static inline void i915_disable_error_s
  {
  }
  
+ static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+ {
+ }
+ static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+ {
+ }
+ static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+ {
+ }
  #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
  
  #endif /* _I915_GPU_ERROR_H_ */
index 156cb1536b4738266c221d39fca78afe0126496f,91491111dbd5adc87f53234a3f5d8d5565f1e1da..860b51b56a92be4a9f99e50a8a1cc524e863d9c1
   * present for a given platform.
   */
  
 +static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm)
 +{
 +      return container_of(rpm, struct drm_i915_private, runtime_pm);
 +}
 +
  #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
  
- #include <linux/sort.h>
- #define STACKDEPTH 8
- static noinline depot_stack_handle_t __save_depot_stack(void)
- {
-       unsigned long entries[STACKDEPTH];
-       unsigned int n;
-       n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-       return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
- }
  static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
  {
-       spin_lock_init(&rpm->debug.lock);
-       stack_depot_init();
+       ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev));
  }
  
- static noinline depot_stack_handle_t
+ static intel_wakeref_t
  track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
  {
-       depot_stack_handle_t stack, *stacks;
-       unsigned long flags;
-       if (rpm->no_wakeref_tracking)
+       if (!rpm->available || rpm->no_wakeref_tracking)
                return -1;
  
-       stack = __save_depot_stack();
-       if (!stack)
-               return -1;
-       spin_lock_irqsave(&rpm->debug.lock, flags);
-       if (!rpm->debug.count)
-               rpm->debug.last_acquire = stack;
-       stacks = krealloc(rpm->debug.owners,
-                         (rpm->debug.count + 1) * sizeof(*stacks),
-                         GFP_NOWAIT | __GFP_NOWARN);
-       if (stacks) {
-               stacks[rpm->debug.count++] = stack;
-               rpm->debug.owners = stacks;
-       } else {
-               stack = -1;
-       }
-       spin_unlock_irqrestore(&rpm->debug.lock, flags);
-       return stack;
+       return intel_ref_tracker_alloc(&rpm->debug);
  }
  
  static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-                                            depot_stack_handle_t stack)
- {
-       struct drm_i915_private *i915 = container_of(rpm,
-                                                    struct drm_i915_private,
-                                                    runtime_pm);
-       unsigned long flags, n;
-       bool found = false;
-       if (unlikely(stack == -1))
-               return;
-       spin_lock_irqsave(&rpm->debug.lock, flags);
-       for (n = rpm->debug.count; n--; ) {
-               if (rpm->debug.owners[n] == stack) {
-                       memmove(rpm->debug.owners + n,
-                               rpm->debug.owners + n + 1,
-                               (--rpm->debug.count - n) * sizeof(stack));
-                       found = true;
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&rpm->debug.lock, flags);
-       if (drm_WARN(&i915->drm, !found,
-                    "Unmatched wakeref (tracking %lu), count %u\n",
-                    rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
-               char *buf;
-               buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-               if (!buf)
-                       return;
-               stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-               DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
-               stack = READ_ONCE(rpm->debug.last_release);
-               if (stack) {
-                       stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-                       DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
-               }
-               kfree(buf);
-       }
- }
- static int cmphandle(const void *_a, const void *_b)
- {
-       const depot_stack_handle_t * const a = _a, * const b = _b;
-       if (*a < *b)
-               return -1;
-       else if (*a > *b)
-               return 1;
-       else
-               return 0;
- }
- static void
- __print_intel_runtime_pm_wakeref(struct drm_printer *p,
-                                const struct intel_runtime_pm_debug *dbg)
+                                            intel_wakeref_t wakeref)
  {
-       unsigned long i;
-       char *buf;
-       buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-       if (!buf)
+       if (!rpm->available || rpm->no_wakeref_tracking)
                return;
  
-       if (dbg->last_acquire) {
-               stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
-               drm_printf(p, "Wakeref last acquired:\n%s", buf);
-       }
-       if (dbg->last_release) {
-               stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
-               drm_printf(p, "Wakeref last released:\n%s", buf);
-       }
-       drm_printf(p, "Wakeref count: %lu\n", dbg->count);
-       sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
-       for (i = 0; i < dbg->count; i++) {
-               depot_stack_handle_t stack = dbg->owners[i];
-               unsigned long rep;
-               rep = 1;
-               while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
-                       rep++, i++;
-               stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-               drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
-       }
-       kfree(buf);
- }
- static noinline void
- __untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
-                      struct intel_runtime_pm_debug *saved)
- {
-       *saved = *debug;
-       debug->owners = NULL;
-       debug->count = 0;
-       debug->last_release = __save_depot_stack();
+       intel_ref_tracker_free(&rpm->debug, wakeref);
  }
  
- static void
- dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
+ static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
  {
-       if (debug->count) {
-               struct drm_printer p = drm_debug_printer("i915");
-               __print_intel_runtime_pm_wakeref(&p, debug);
-       }
-       kfree(debug->owners);
+       ref_tracker_dir_exit(&rpm->debug);
  }
  
  static noinline void
  __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
  {
-       struct intel_runtime_pm_debug dbg = {};
        unsigned long flags;
  
        if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
                                         flags))
                return;
  
-       __untrack_all_wakerefs(&rpm->debug, &dbg);
+       ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT);
        spin_unlock_irqrestore(&rpm->debug.lock, flags);
-       dump_and_free_wakeref_tracking(&dbg);
- }
- static noinline void
- untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
- {
-       struct intel_runtime_pm_debug dbg = {};
-       unsigned long flags;
-       spin_lock_irqsave(&rpm->debug.lock, flags);
-       __untrack_all_wakerefs(&rpm->debug, &dbg);
-       spin_unlock_irqrestore(&rpm->debug.lock, flags);
-       dump_and_free_wakeref_tracking(&dbg);
  }
  
  void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
                                    struct drm_printer *p)
  {
-       struct intel_runtime_pm_debug dbg = {};
-       do {
-               unsigned long alloc = dbg.count;
-               depot_stack_handle_t *s;
-               spin_lock_irq(&rpm->debug.lock);
-               dbg.count = rpm->debug.count;
-               if (dbg.count <= alloc) {
-                       memcpy(dbg.owners,
-                              rpm->debug.owners,
-                              dbg.count * sizeof(*s));
-               }
-               dbg.last_acquire = rpm->debug.last_acquire;
-               dbg.last_release = rpm->debug.last_release;
-               spin_unlock_irq(&rpm->debug.lock);
-               if (dbg.count <= alloc)
-                       break;
-               s = krealloc(dbg.owners,
-                            dbg.count * sizeof(*s),
-                            GFP_NOWAIT | __GFP_NOWARN);
-               if (!s)
-                       goto out;
-               dbg.owners = s;
-       } while (1);
-       __print_intel_runtime_pm_wakeref(p, &dbg);
- out:
-       kfree(dbg.owners);
+       intel_ref_tracker_show(&rpm->debug, p);
  }
  
  #else
@@@ -302,14 -106,14 +111,14 @@@ static void init_intel_runtime_pm_waker
  {
  }
  
- static depot_stack_handle_t
+ static intel_wakeref_t
  track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
  {
        return -1;
  }
  
  static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-                                            intel_wakeref_t wref)
+                                            intel_wakeref_t wakeref)
  {
  }
  
@@@ -354,7 -158,9 +163,7 @@@ intel_runtime_pm_release(struct intel_r
  static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
                                              bool wakelock)
  {
 -      struct drm_i915_private *i915 = container_of(rpm,
 -                                                   struct drm_i915_private,
 -                                                   runtime_pm);
 +      struct drm_i915_private *i915 = rpm_to_i915(rpm);
        int ret;
  
        ret = pm_runtime_get_sync(rpm->kdev);
@@@ -559,7 -365,9 +368,7 @@@ void intel_runtime_pm_put(struct intel_
   */
  void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
  {
 -      struct drm_i915_private *i915 = container_of(rpm,
 -                                                   struct drm_i915_private,
 -                                                   runtime_pm);
 +      struct drm_i915_private *i915 = rpm_to_i915(rpm);
        struct device *kdev = rpm->kdev;
  
        /*
  
  void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
  {
 -      struct drm_i915_private *i915 = container_of(rpm,
 -                                                   struct drm_i915_private,
 -                                                   runtime_pm);
 +      struct drm_i915_private *i915 = rpm_to_i915(rpm);
        struct device *kdev = rpm->kdev;
  
        /* Transfer rpm ownership back to core */
  
  void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
  {
 -      struct drm_i915_private *i915 = container_of(rpm,
 -                                                   struct drm_i915_private,
 -                                                   runtime_pm);
 +      struct drm_i915_private *i915 = rpm_to_i915(rpm);
        int count = atomic_read(&rpm->wakeref_count);
  
        intel_wakeref_auto_fini(&rpm->userfault_wakeref);
                 "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
                 intel_rpm_raw_wakeref_count(count),
                 intel_rpm_wakelock_count(count));
+ }
  
+ void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm)
+ {
+       intel_runtime_pm_driver_release(rpm);
        untrack_all_intel_runtime_pm_wakerefs(rpm);
  }
  
  void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
  {
 -      struct drm_i915_private *i915 =
 -                      container_of(rpm, struct drm_i915_private, runtime_pm);
 +      struct drm_i915_private *i915 = rpm_to_i915(rpm);
        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
        struct device *kdev = &pdev->dev;
  
index be43614c73fdc59100fbfda43b69cb28afd89309,f3cb7d4c7000f18a1248c2882255e3a81b328e87..de3579d399e18dd71010dc89bcf99f38184ad6f2
@@@ -11,6 -11,8 +11,6 @@@
  
  #include "intel_wakeref.h"
  
 -#include "i915_utils.h"
 -
  struct device;
  struct drm_i915_private;
  struct drm_printer;
@@@ -75,15 -77,7 +75,7 @@@ struct intel_runtime_pm 
         * paired rpm_put) we can remove corresponding pairs of and keep
         * the array trimmed to active wakerefs.
         */
-       struct intel_runtime_pm_debug {
-               spinlock_t lock;
-               depot_stack_handle_t last_acquire;
-               depot_stack_handle_t last_release;
-               depot_stack_handle_t *owners;
-               unsigned long count;
-       } debug;
+       struct ref_tracker_dir debug;
  #endif
  };
  
@@@ -187,6 -181,7 +179,7 @@@ void intel_runtime_pm_init_early(struc
  void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
  void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
  void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
+ void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm);
  
  intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
  intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);